Upgrade clang-tidy on CI. (#5469)

* Correct all clang-tidy errors.
* Upgrade clang-tidy to 10 on CI.

Co-authored-by: Hyunsu Cho <chohyu01@cs.washington.edu>
This commit is contained in:
Jiaming Yuan
2020-04-05 04:42:29 +08:00
committed by GitHub
parent 30e94ddd04
commit 0012f2ef93
107 changed files with 932 additions and 903 deletions

View File

@@ -56,8 +56,8 @@ __forceinline__ __device__ BitFieldAtomicType AtomicAnd(BitFieldAtomicType* addr
*/
template <typename VT, typename Direction>
struct BitFieldContainer {
using value_type = VT;
using pointer = value_type*;
using value_type = VT; // NOLINT
using pointer = value_type*; // NOLINT
static value_type constexpr kValueSize = sizeof(value_type) * 8;
static value_type constexpr kOne = 1; // force correct type.
@@ -67,6 +67,7 @@ struct BitFieldContainer {
value_type bit_pos {0};
};
private:
common::Span<value_type> bits_;
static_assert(!std::is_signed<VT>::value, "Must use unsiged type as underlying storage.");
@@ -82,9 +83,12 @@ struct BitFieldContainer {
public:
BitFieldContainer() = default;
XGBOOST_DEVICE BitFieldContainer(common::Span<value_type> bits) : bits_{bits} {}
XGBOOST_DEVICE explicit BitFieldContainer(common::Span<value_type> bits) : bits_{bits} {}
XGBOOST_DEVICE BitFieldContainer(BitFieldContainer const& other) : bits_{other.bits_} {}
common::Span<value_type> Bits() { return bits_; }
common::Span<value_type const> Bits() const { return bits_; }
/*\brief Compute the size of needed memory allocation. The returned value is in terms
* of number of elements with `BitFieldContainer::value_type'.
*/
@@ -190,7 +194,7 @@ template <typename VT>
struct LBitsPolicy : public BitFieldContainer<VT, LBitsPolicy<VT>> {
using Container = BitFieldContainer<VT, LBitsPolicy<VT>>;
using Pos = typename Container::Pos;
using value_type = typename Container::value_type;
using value_type = typename Container::value_type; // NOLINT
XGBOOST_DEVICE static Pos Shift(Pos pos) {
pos.bit_pos = Container::kValueSize - pos.bit_pos - Container::kOne;
@@ -204,7 +208,7 @@ template <typename VT>
struct RBitsPolicy : public BitFieldContainer<VT, RBitsPolicy<VT>> {
using Container = BitFieldContainer<VT, RBitsPolicy<VT>>;
using Pos = typename Container::Pos;
using value_type = typename Container::value_type;
using value_type = typename Container::value_type; // NOLINT
XGBOOST_DEVICE static Pos Shift(Pos pos) {
return pos;

View File

@@ -141,7 +141,7 @@ class ColumnMatrix {
feature_offsets_[fid] = accum_index_;
}
SetTypeSize(gmat.max_num_bins_);
SetTypeSize(gmat.max_num_bins);
index_.resize(feature_offsets_[nfeature] * bins_type_size_, 0);
if (!all_dense) {
@@ -161,24 +161,24 @@ class ColumnMatrix {
// pre-fill index_ for dense columns
if (all_dense) {
BinTypeSize gmat_bin_size = gmat.index.getBinTypeSize();
if (gmat_bin_size == UINT8_BINS_TYPE_SIZE) {
BinTypeSize gmat_bin_size = gmat.index.GetBinTypeSize();
if (gmat_bin_size == kUint8BinsTypeSize) {
SetIndexAllDense(gmat.index.data<uint8_t>(), gmat, nrow, nfeature, noMissingValues);
} else if (gmat_bin_size == UINT16_BINS_TYPE_SIZE) {
} else if (gmat_bin_size == kUint16BinsTypeSize) {
SetIndexAllDense(gmat.index.data<uint16_t>(), gmat, nrow, nfeature, noMissingValues);
} else {
CHECK_EQ(gmat_bin_size, UINT32_BINS_TYPE_SIZE);
CHECK_EQ(gmat_bin_size, kUint32BinsTypeSize);
SetIndexAllDense(gmat.index.data<uint32_t>(), gmat, nrow, nfeature, noMissingValues);
}
/* For sparse DMatrix gmat.index.getBinTypeSize() returns always UINT32_BINS_TYPE_SIZE
/* For sparse DMatrix gmat.index.getBinTypeSize() returns always kUint32BinsTypeSize
but for ColumnMatrix we still have a chance to reduce the memory consumption */
} else {
if (bins_type_size_ == UINT8_BINS_TYPE_SIZE) {
if (bins_type_size_ == kUint8BinsTypeSize) {
SetIndex<uint8_t>(gmat.index.data<uint32_t>(), gmat, nrow, nfeature);
} else if (bins_type_size_ == UINT16_BINS_TYPE_SIZE) {
} else if (bins_type_size_ == kUint16BinsTypeSize) {
SetIndex<uint16_t>(gmat.index.data<uint32_t>(), gmat, nrow, nfeature);
} else {
CHECK_EQ(bins_type_size_, UINT32_BINS_TYPE_SIZE);
CHECK_EQ(bins_type_size_, kUint32BinsTypeSize);
SetIndex<uint32_t>(gmat.index.data<uint32_t>(), gmat, nrow, nfeature);
}
}
@@ -187,11 +187,11 @@ class ColumnMatrix {
/* Set the number of bytes based on numeric limit of maximum number of bins provided by user */
void SetTypeSize(size_t max_num_bins) {
if ( (max_num_bins - 1) <= static_cast<int>(std::numeric_limits<uint8_t>::max()) ) {
bins_type_size_ = UINT8_BINS_TYPE_SIZE;
bins_type_size_ = kUint8BinsTypeSize;
} else if ((max_num_bins - 1) <= static_cast<int>(std::numeric_limits<uint16_t>::max())) {
bins_type_size_ = UINT16_BINS_TYPE_SIZE;
bins_type_size_ = kUint16BinsTypeSize;
} else {
bins_type_size_ = UINT32_BINS_TYPE_SIZE;
bins_type_size_ = kUint32BinsTypeSize;
}
}
@@ -227,7 +227,7 @@ class ColumnMatrix {
/* missing values make sense only for column with type kDenseColumn,
and if no missing values were observed it could be handled much faster. */
if (noMissingValues) {
const int32_t nthread = omp_get_max_threads();
const int32_t nthread = omp_get_max_threads(); // NOLINT
#pragma omp parallel for num_threads(nthread)
for (omp_ulong rid = 0; rid < nrow; ++rid) {
const size_t ibegin = rid*nfeature;
@@ -241,7 +241,7 @@ class ColumnMatrix {
} else {
/* to handle rows in all batches, sum of all batch sizes equal to gmat.row_ptr.size() - 1 */
size_t rbegin = 0;
for (const auto &batch : gmat.p_fmat_->GetBatches<SparsePage>()) {
for (const auto &batch : gmat.p_fmat->GetBatches<SparsePage>()) {
const xgboost::Entry* data_ptr = batch.data.HostVector().data();
const std::vector<bst_row_t>& offset_vec = batch.offset.HostVector();
const size_t batch_size = batch.Size();
@@ -276,7 +276,7 @@ class ColumnMatrix {
T* local_index = reinterpret_cast<T*>(&index_[0]);
size_t rbegin = 0;
for (const auto &batch : gmat.p_fmat_->GetBatches<SparsePage>()) {
for (const auto &batch : gmat.p_fmat->GetBatches<SparsePage>()) {
const xgboost::Entry* data_ptr = batch.data.HostVector().data();
const std::vector<bst_row_t>& offset_vec = batch.offset.HostVector();
const size_t batch_size = batch.Size();

View File

@@ -118,7 +118,7 @@ class Range {
XGBOOST_DEVICE explicit Iterator(DifferenceType start, DifferenceType step) :
i_{start}, step_{step} {}
public:
private:
int64_t i_;
DifferenceType step_ = 1;
};

View File

@@ -112,7 +112,7 @@ class CompressedBufferWriter {
size_t ibyte_start = ibit_start / 8, ibyte_end = ibit_end / 8;
symbol <<= 7 - ibit_end % 8;
for (ptrdiff_t ibyte = ibyte_end; ibyte >= (ptrdiff_t)ibyte_start; --ibyte) {
for (ptrdiff_t ibyte = ibyte_end; ibyte >= static_cast<ptrdiff_t>(ibyte_start); --ibyte) {
dh::AtomicOrByte(reinterpret_cast<unsigned int*>(buffer + detail::kPadding),
ibyte, symbol & 0xff);
symbol >>= 8;
@@ -182,14 +182,14 @@ class CompressedIterator {
typedef value_type reference; // NOLINT
private:
const CompressedByteT *buffer_;
size_t symbol_bits_;
size_t offset_;
const CompressedByteT *buffer_ {nullptr};
size_t symbol_bits_ {0};
size_t offset_ {0};
public:
CompressedIterator() : buffer_(nullptr), symbol_bits_(0), offset_(0) {}
CompressedIterator() = default;
CompressedIterator(const CompressedByteT *buffer, size_t num_symbols)
: buffer_(buffer), offset_(0) {
: buffer_(buffer) {
symbol_bits_ = detail::SymbolBits(num_symbols);
}

View File

@@ -29,8 +29,8 @@ class ConfigParser {
* \brief Constructor for INI-style configuration parser
* \param path path to configuration file
*/
explicit ConfigParser(const std::string& path)
: path_(path),
explicit ConfigParser(const std::string path)
: path_(std::move(path)),
line_comment_regex_("^#"),
key_regex_(R"rx(^([^#"'=\r\n\t ]+)[\t ]*=)rx"),
key_regex_escaped_(R"rx(^(["'])([^"'=\r\n]+)\1[\t ]*=)rx"),
@@ -58,12 +58,12 @@ class ConfigParser {
std::string NormalizeConfigEOL(std::string const& config_str) {
std::string result;
std::stringstream ss(config_str);
for (size_t i = 0; i < config_str.size(); ++i) {
if (config_str[i] == '\r') {
for (auto c : config_str) {
if (c == '\r') {
result.push_back('\n');
continue;
}
result.push_back(config_str[i]);
result.push_back(c);
}
return result;
}

View File

@@ -37,7 +37,7 @@ void AllReducer::Init(int _device_ordinal) {
#ifdef XGBOOST_USE_NCCL
LOG(DEBUG) << "Running nccl init on: " << __CUDACC_VER_MAJOR__ << "." << __CUDACC_VER_MINOR__;
device_ordinal = _device_ordinal;
device_ordinal_ = _device_ordinal;
int32_t const rank = rabit::GetRank();
#if __CUDACC_VER_MAJOR__ > 9
@@ -46,7 +46,7 @@ void AllReducer::Init(int _device_ordinal) {
std::vector<uint64_t> uuids(world * kUuidLength, 0);
auto s_uuid = xgboost::common::Span<uint64_t>{uuids.data(), uuids.size()};
auto s_this_uuid = s_uuid.subspan(rank * kUuidLength, kUuidLength);
GetCudaUUID(world, rank, device_ordinal, s_this_uuid);
GetCudaUUID(world, rank, device_ordinal_, s_this_uuid);
// No allgather yet.
rabit::Allreduce<rabit::op::Sum, uint64_t>(uuids.data(), uuids.size());
@@ -66,10 +66,10 @@ void AllReducer::Init(int _device_ordinal) {
<< "device is not supported";
#endif // __CUDACC_VER_MAJOR__ > 9
id = GetUniqueId();
dh::safe_cuda(cudaSetDevice(device_ordinal));
dh::safe_nccl(ncclCommInitRank(&comm, rabit::GetWorldSize(), id, rank));
safe_cuda(cudaStreamCreate(&stream));
id_ = GetUniqueId();
dh::safe_cuda(cudaSetDevice(device_ordinal_));
dh::safe_nccl(ncclCommInitRank(&comm_, rabit::GetWorldSize(), id_, rank));
safe_cuda(cudaStreamCreate(&stream_));
initialised_ = true;
#else
if (rabit::IsDistributed()) {
@@ -81,8 +81,8 @@ void AllReducer::Init(int _device_ordinal) {
AllReducer::~AllReducer() {
#ifdef XGBOOST_USE_NCCL
if (initialised_) {
dh::safe_cuda(cudaStreamDestroy(stream));
ncclCommDestroy(comm);
dh::safe_cuda(cudaStreamDestroy(stream_));
ncclCommDestroy(comm_);
}
if (xgboost::ConsoleLogger::ShouldLog(xgboost::ConsoleLogger::LV::kDebug)) {
LOG(CONSOLE) << "======== NCCL Statistics========";

View File

@@ -35,10 +35,10 @@
#include "../common/io.h"
#endif
#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600
#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600 || defined(__clang__)
#else // In device code and CUDA < 600
XGBOOST_DEVICE __forceinline__ double atomicAdd(double* address, double val) {
__device__ __forceinline__ double atomicAdd(double* address, double val) { // NOLINT
unsigned long long int* address_as_ull =
(unsigned long long int*)address; // NOLINT
unsigned long long int old = *address_as_ull, assumed; // NOLINT
@@ -141,7 +141,8 @@ inline void CheckComputeCapability() {
}
DEV_INLINE void AtomicOrByte(unsigned int* __restrict__ buffer, size_t ibyte, unsigned char b) {
atomicOr(&buffer[ibyte / sizeof(unsigned int)], (unsigned int)b << (ibyte % (sizeof(unsigned int)) * 8));
atomicOr(&buffer[ibyte / sizeof(unsigned int)],
static_cast<unsigned int>(b) << (ibyte % (sizeof(unsigned int)) * 8));
}
namespace internal {
@@ -174,7 +175,7 @@ CountNumItemsImpl(bool left, const T * __restrict__ items, uint32_t n, T v,
return left ? items_begin - items : items + n - items_begin;
}
}
} // namespace internal
/*!
* \brief Find the strict upper bound for an element in a sorted array
@@ -291,9 +292,9 @@ class LaunchKernel {
dim3 blocks_;
public:
LaunchKernel(uint32_t _grids, uint32_t _blk, size_t _shmem=0, cudaStream_t _s=0) :
LaunchKernel(uint32_t _grids, uint32_t _blk, size_t _shmem=0, cudaStream_t _s=nullptr) :
grids_{_grids, 1, 1}, blocks_{_blk, 1, 1}, shmem_size_{_shmem}, stream_{_s} {}
LaunchKernel(dim3 _grids, dim3 _blk, size_t _shmem=0, cudaStream_t _s=0) :
LaunchKernel(dim3 _grids, dim3 _blk, size_t _shmem=0, cudaStream_t _s=nullptr) :
grids_{_grids}, blocks_{_blk}, shmem_size_{_shmem}, stream_{_s} {}
template <typename K, typename... Args>
@@ -359,16 +360,18 @@ class MemoryLogger {
public:
void RegisterAllocation(void *ptr, size_t n) {
if (!xgboost::ConsoleLogger::ShouldLog(xgboost::ConsoleLogger::LV::kDebug))
if (!xgboost::ConsoleLogger::ShouldLog(xgboost::ConsoleLogger::LV::kDebug)) {
return;
}
std::lock_guard<std::mutex> guard(mutex_);
int current_device;
safe_cuda(cudaGetDevice(&current_device));
stats_.RegisterAllocation(ptr, n);
}
void RegisterDeallocation(void *ptr, size_t n) {
if (!xgboost::ConsoleLogger::ShouldLog(xgboost::ConsoleLogger::LV::kDebug))
if (!xgboost::ConsoleLogger::ShouldLog(xgboost::ConsoleLogger::LV::kDebug)) {
return;
}
std::lock_guard<std::mutex> guard(mutex_);
int current_device;
safe_cuda(cudaGetDevice(&current_device));
@@ -384,8 +387,9 @@ public:
}
void Log() {
if (!xgboost::ConsoleLogger::ShouldLog(xgboost::ConsoleLogger::LV::kDebug))
if (!xgboost::ConsoleLogger::ShouldLog(xgboost::ConsoleLogger::LV::kDebug)) {
return;
}
std::lock_guard<std::mutex> guard(mutex_);
int current_device;
safe_cuda(cudaGetDevice(&current_device));
@@ -396,7 +400,7 @@ public:
LOG(CONSOLE) << "Number of allocations: " << stats_.num_allocations;
}
};
};
} // namespace detail
inline detail::MemoryLogger &GlobalMemoryLogger() {
static detail::MemoryLogger memory_logger;
@@ -413,27 +417,27 @@ inline void DebugSyncDevice(std::string file="", int32_t line = -1) {
safe_cuda(cudaGetLastError());
}
namespace detail{
namespace detail {
/**
* \brief Default memory allocator, uses cudaMalloc/Free and logs allocations if verbose.
*/
template <class T>
struct XGBDefaultDeviceAllocatorImpl : thrust::device_malloc_allocator<T> {
using super_t = thrust::device_malloc_allocator<T>;
using pointer = thrust::device_ptr<T>;
using SuperT = thrust::device_malloc_allocator<T>;
using pointer = thrust::device_ptr<T>; // NOLINT
template<typename U>
struct rebind
struct rebind // NOLINT
{
typedef XGBDefaultDeviceAllocatorImpl<U> other;
using other = XGBDefaultDeviceAllocatorImpl<U>; // NOLINT
};
pointer allocate(size_t n) {
pointer ptr = super_t::allocate(n);
pointer allocate(size_t n) { // NOLINT
pointer ptr = SuperT::allocate(n);
GlobalMemoryLogger().RegisterAllocation(ptr.get(), n * sizeof(T));
return ptr;
}
void deallocate(pointer ptr, size_t n) {
void deallocate(pointer ptr, size_t n) { // NOLINT
GlobalMemoryLogger().RegisterDeallocation(ptr.get(), n * sizeof(T));
return super_t::deallocate(ptr, n);
return SuperT::deallocate(ptr, n);
}
};
@@ -442,11 +446,11 @@ struct XGBDefaultDeviceAllocatorImpl : thrust::device_malloc_allocator<T> {
*/
template <class T>
struct XGBCachingDeviceAllocatorImpl : thrust::device_malloc_allocator<T> {
using pointer = thrust::device_ptr<T>;
using pointer = thrust::device_ptr<T>; // NOLINT
template<typename U>
struct rebind
struct rebind // NOLINT
{
typedef XGBCachingDeviceAllocatorImpl<U> other;
using other = XGBCachingDeviceAllocatorImpl<U>; // NOLINT
};
cub::CachingDeviceAllocator& GetGlobalCachingAllocator ()
{
@@ -455,7 +459,7 @@ struct XGBCachingDeviceAllocatorImpl : thrust::device_malloc_allocator<T> {
static cub::CachingDeviceAllocator *allocator = new cub::CachingDeviceAllocator(2, 9, 29);
return *allocator;
}
pointer allocate(size_t n) {
pointer allocate(size_t n) { // NOLINT
T *ptr;
GetGlobalCachingAllocator().DeviceAllocate(reinterpret_cast<void **>(&ptr),
n * sizeof(T));
@@ -463,17 +467,17 @@ struct XGBCachingDeviceAllocatorImpl : thrust::device_malloc_allocator<T> {
GlobalMemoryLogger().RegisterAllocation(thrust_ptr.get(), n * sizeof(T));
return thrust_ptr;
}
void deallocate(pointer ptr, size_t n) {
void deallocate(pointer ptr, size_t n) { // NOLINT
GlobalMemoryLogger().RegisterDeallocation(ptr.get(), n * sizeof(T));
GetGlobalCachingAllocator().DeviceFree(ptr.get());
}
__host__ __device__
void construct(T *)
void construct(T *) // NOLINT
{
// no-op
}
};
};
} // namespace detail
// Declare xgboost allocators
// Replacement of allocator with custom backend should occur here
@@ -486,9 +490,9 @@ template <typename T>
using XGBCachingDeviceAllocator = detail::XGBCachingDeviceAllocatorImpl<T>;
/** \brief Specialisation of thrust device vector using custom allocator. */
template <typename T>
using device_vector = thrust::device_vector<T, XGBDeviceAllocator<T>>;
using device_vector = thrust::device_vector<T, XGBDeviceAllocator<T>>; // NOLINT
template <typename T>
using caching_device_vector = thrust::device_vector<T, XGBCachingDeviceAllocator<T>>;
using caching_device_vector = thrust::device_vector<T, XGBCachingDeviceAllocator<T>>; // NOLINT
/**
* \brief A double buffer, useful for algorithms like sort.
@@ -517,7 +521,7 @@ class DoubleBuffer {
return xgboost::common::Span<T>{buff.Current(), Size()};
}
T *other() { return buff.Alternate(); }
T *Other() { return buff.Alternate(); }
};
/**
@@ -688,7 +692,9 @@ class BulkAllocator {
template <typename... Args>
void Allocate(int device_idx, Args... args) {
if (device_idx_ == -1) device_idx_ = device_idx;
if (device_idx_ == -1) {
device_idx_ = device_idx;
}
else CHECK(device_idx_ == device_idx);
size_t size = GetSizeBytes(args...);
@@ -728,13 +734,13 @@ struct PinnedMemory {
// Keep track of cub library device allocation
struct CubMemory {
void *d_temp_storage;
size_t temp_storage_bytes;
void *d_temp_storage { nullptr };
size_t temp_storage_bytes { 0 };
// Thrust
using value_type = char; // NOLINT
CubMemory() : d_temp_storage(nullptr), temp_storage_bytes(0) {}
CubMemory() = default;
~CubMemory() { Free(); }
@@ -818,7 +824,7 @@ __global__ void LbsKernel(CoordinateT *d_coordinates,
cub::CountingInputIterator<OffsetT> tile_element_indices(tile_start_coord.y);
CoordinateT thread_start_coord;
typedef typename std::iterator_traits<SegmentIterT>::value_type SegmentT;
using SegmentT = typename std::iterator_traits<SegmentIterT>::value_type;
__shared__ struct {
SegmentT tile_segment_end_offsets[TILE_SIZE + 1];
SegmentT output_segment[TILE_SIZE];
@@ -862,7 +868,7 @@ template <typename FunctionT, typename SegmentIterT, typename OffsetT>
void SparseTransformLbs(int device_idx, dh::CubMemory *temp_memory,
OffsetT count, SegmentIterT segments,
OffsetT num_segments, FunctionT f) {
typedef typename cub::CubVector<OffsetT, 2>::Type CoordinateT;
using CoordinateT = typename cub::CubVector<OffsetT, 2>::Type;
dh::safe_cuda(cudaSetDevice(device_idx));
const int BLOCK_THREADS = 256;
const int ITEMS_PER_THREAD = 1;
@@ -961,13 +967,13 @@ void SegmentedSort(dh::CubMemory *tmp_mem, dh::DoubleBuffer<T1> *keys,
* @param nVals number of elements in the input array
*/
template <typename T>
void SumReduction(dh::CubMemory &tmp_mem, xgboost::common::Span<T> in, xgboost::common::Span<T> out,
void SumReduction(dh::CubMemory* tmp_mem, xgboost::common::Span<T> in, xgboost::common::Span<T> out,
int nVals) {
size_t tmpSize;
dh::safe_cuda(
cub::DeviceReduce::Sum(NULL, tmpSize, in.data(), out.data(), nVals));
tmp_mem.LazyAllocate(tmpSize);
dh::safe_cuda(cub::DeviceReduce::Sum(tmp_mem.d_temp_storage, tmpSize,
tmp_mem->LazyAllocate(tmpSize);
dh::safe_cuda(cub::DeviceReduce::Sum(tmp_mem->d_temp_storage, tmpSize,
in.data(), out.data(), nVals));
}
@@ -980,20 +986,20 @@ void SumReduction(dh::CubMemory &tmp_mem, xgboost::common::Span<T> in, xgboost::
*/
template <typename T>
typename std::iterator_traits<T>::value_type SumReduction(
dh::CubMemory &tmp_mem, T in, int nVals) {
dh::CubMemory* tmp_mem, T in, int nVals) {
using ValueT = typename std::iterator_traits<T>::value_type;
size_t tmpSize {0};
ValueT *dummy_out = nullptr;
dh::safe_cuda(cub::DeviceReduce::Sum(nullptr, tmpSize, in, dummy_out, nVals));
// Allocate small extra memory for the return value
tmp_mem.LazyAllocate(tmpSize + sizeof(ValueT));
auto ptr = reinterpret_cast<ValueT *>(tmp_mem.d_temp_storage) + 1;
tmp_mem->LazyAllocate(tmpSize + sizeof(ValueT));
auto ptr = reinterpret_cast<ValueT *>(tmp_mem->d_temp_storage) + 1;
dh::safe_cuda(cub::DeviceReduce::Sum(
reinterpret_cast<void *>(ptr), tmpSize, in,
reinterpret_cast<ValueT *>(tmp_mem.d_temp_storage),
reinterpret_cast<ValueT *>(tmp_mem->d_temp_storage),
nVals));
ValueT sum;
dh::safe_cuda(cudaMemcpy(&sum, tmp_mem.d_temp_storage, sizeof(ValueT),
dh::safe_cuda(cudaMemcpy(&sum, tmp_mem->d_temp_storage, sizeof(ValueT),
cudaMemcpyDeviceToHost));
return sum;
}
@@ -1079,20 +1085,19 @@ class SaveCudaContext {
* this is a dummy class that will error if used with more than one GPU.
*/
class AllReducer {
bool initialised_;
size_t allreduce_bytes_; // Keep statistics of the number of bytes communicated
size_t allreduce_calls_; // Keep statistics of the number of reduce calls
std::vector<size_t> host_data; // Used for all reduce on host
bool initialised_ {false};
size_t allreduce_bytes_ {0}; // Keep statistics of the number of bytes communicated
size_t allreduce_calls_ {0}; // Keep statistics of the number of reduce calls
std::vector<size_t> host_data_; // Used for all reduce on host
#ifdef XGBOOST_USE_NCCL
ncclComm_t comm;
cudaStream_t stream;
int device_ordinal;
ncclUniqueId id;
ncclComm_t comm_;
cudaStream_t stream_;
int device_ordinal_;
ncclUniqueId id_;
#endif
public:
AllReducer() : initialised_(false), allreduce_bytes_(0),
allreduce_calls_(0) {}
AllReducer() = default;
/**
* \brief Initialise with the desired device ordinal for this communication
@@ -1116,8 +1121,8 @@ class AllReducer {
void AllReduceSum(const double *sendbuff, double *recvbuff, int count) {
#ifdef XGBOOST_USE_NCCL
CHECK(initialised_);
dh::safe_cuda(cudaSetDevice(device_ordinal));
dh::safe_nccl(ncclAllReduce(sendbuff, recvbuff, count, ncclDouble, ncclSum, comm, stream));
dh::safe_cuda(cudaSetDevice(device_ordinal_));
dh::safe_nccl(ncclAllReduce(sendbuff, recvbuff, count, ncclDouble, ncclSum, comm_, stream_));
allreduce_bytes_ += count * sizeof(double);
allreduce_calls_ += 1;
#endif
@@ -1135,8 +1140,8 @@ class AllReducer {
void AllReduceSum(const float *sendbuff, float *recvbuff, int count) {
#ifdef XGBOOST_USE_NCCL
CHECK(initialised_);
dh::safe_cuda(cudaSetDevice(device_ordinal));
dh::safe_nccl(ncclAllReduce(sendbuff, recvbuff, count, ncclFloat, ncclSum, comm, stream));
dh::safe_cuda(cudaSetDevice(device_ordinal_));
dh::safe_nccl(ncclAllReduce(sendbuff, recvbuff, count, ncclFloat, ncclSum, comm_, stream_));
allreduce_bytes_ += count * sizeof(float);
allreduce_calls_ += 1;
#endif
@@ -1156,8 +1161,8 @@ class AllReducer {
#ifdef XGBOOST_USE_NCCL
CHECK(initialised_);
dh::safe_cuda(cudaSetDevice(device_ordinal));
dh::safe_nccl(ncclAllReduce(sendbuff, recvbuff, count, ncclInt64, ncclSum, comm, stream));
dh::safe_cuda(cudaSetDevice(device_ordinal_));
dh::safe_nccl(ncclAllReduce(sendbuff, recvbuff, count, ncclInt64, ncclSum, comm_, stream_));
#endif
}
@@ -1168,8 +1173,8 @@ class AllReducer {
*/
void Synchronize() {
#ifdef XGBOOST_USE_NCCL
dh::safe_cuda(cudaSetDevice(device_ordinal));
dh::safe_cuda(cudaStreamSynchronize(stream));
dh::safe_cuda(cudaSetDevice(device_ordinal_));
dh::safe_cuda(cudaStreamSynchronize(stream_));
#endif
};
@@ -1183,15 +1188,15 @@ class AllReducer {
* \return the Unique ID
*/
ncclUniqueId GetUniqueId() {
static const int RootRank = 0;
static const int kRootRank = 0;
ncclUniqueId id;
if (rabit::GetRank() == RootRank) {
if (rabit::GetRank() == kRootRank) {
dh::safe_nccl(ncclGetUniqueId(&id));
}
rabit::Broadcast(
(void*)&id,
(size_t)sizeof(ncclUniqueId),
(int)RootRank);
static_cast<void*>(&id),
sizeof(ncclUniqueId),
static_cast<int>(kRootRank));
return id;
}
#endif
@@ -1202,18 +1207,18 @@ class AllReducer {
void HostMaxAllReduce(std::vector<size_t> *p_data) {
#ifdef XGBOOST_USE_NCCL
auto &data = *p_data;
// Wait in case some other thread is accessing host_data
// Wait in case some other thread is accessing host_data_
#pragma omp barrier
// Reset shared buffer
#pragma omp single
{
host_data.resize(data.size());
std::fill(host_data.begin(), host_data.end(), size_t(0));
host_data_.resize(data.size());
std::fill(host_data_.begin(), host_data_.end(), size_t(0));
}
// Threads update shared array
for (auto i = 0ull; i < data.size(); i++) {
#pragma omp critical
{ host_data[i] = std::max(host_data[i], data[i]); }
{ host_data_[i] = std::max(host_data_[i], data[i]); }
}
// Wait until all threads are finished
#pragma omp barrier
@@ -1221,15 +1226,15 @@ class AllReducer {
// One thread performs all reduce across distributed nodes
#pragma omp master
{
rabit::Allreduce<rabit::op::Max, size_t>(host_data.data(),
host_data.size());
rabit::Allreduce<rabit::op::Max, size_t>(host_data_.data(),
host_data_.size());
}
#pragma omp barrier
// Threads can now read back all reduced values
for (auto i = 0ull; i < data.size(); i++) {
data[i] = host_data[i];
data[i] = host_data_[i];
}
#endif
}
@@ -1264,12 +1269,12 @@ thrust::device_ptr<T> tend(xgboost::HostDeviceVector<T>& vector) { // // NOLINT
}
template <typename T>
thrust::device_ptr<T const> tcbegin(xgboost::HostDeviceVector<T> const& vector) {
thrust::device_ptr<T const> tcbegin(xgboost::HostDeviceVector<T> const& vector) { // NOLINT
return thrust::device_ptr<T const>(vector.ConstDevicePointer());
}
template <typename T>
thrust::device_ptr<T const> tcend(xgboost::HostDeviceVector<T> const& vector) {
thrust::device_ptr<T const> tcend(xgboost::HostDeviceVector<T> const& vector) { // NOLINT
return tcbegin(vector) + vector.Size();
}
@@ -1279,17 +1284,17 @@ thrust::device_ptr<T> tbegin(xgboost::common::Span<T>& span) { // NOLINT
}
template <typename T>
thrust::device_ptr<T> tend(xgboost::common::Span<T>& span) { // // NOLINT
thrust::device_ptr<T> tend(xgboost::common::Span<T>& span) { // NOLINT
return tbegin(span) + span.size();
}
template <typename T>
thrust::device_ptr<T const> tcbegin(xgboost::common::Span<T> const& span) {
thrust::device_ptr<T const> tcbegin(xgboost::common::Span<T> const& span) { // NOLINT
return thrust::device_ptr<T const>(span.data());
}
template <typename T>
thrust::device_ptr<T const> tcend(xgboost::common::Span<T> const& span) {
thrust::device_ptr<T const> tcend(xgboost::common::Span<T> const& span) { // NOLINT
return tcbegin(span) + span.size();
}
@@ -1465,9 +1470,9 @@ class SegmentSorter {
template <typename FunctionT>
class LauncherItr {
public:
int idx;
int idx { 0 };
FunctionT f;
XGBOOST_DEVICE LauncherItr() : idx(0) {}
XGBOOST_DEVICE LauncherItr() : idx(0) {} // NOLINT
XGBOOST_DEVICE LauncherItr(int idx, FunctionT f) : idx(idx), f(f) {}
XGBOOST_DEVICE LauncherItr &operator=(int output) {
f(idx, output);
@@ -1493,7 +1498,7 @@ public:
using value_type = void; // NOLINT
using pointer = value_type *; // NOLINT
using reference = LauncherItr<FunctionT>; // NOLINT
using iterator_category = typename thrust::detail::iterator_facade_category<
using iterator_category = typename thrust::detail::iterator_facade_category< // NOLINT
thrust::any_system_tag, thrust::random_access_traversal_tag, value_type,
reference>::type; // NOLINT
private:

View File

@@ -1,5 +1,5 @@
/*!
* Copyright 2017-2019 by Contributors
* Copyright 2017-2020 by Contributors
* \file hist_util.cc
*/
#include <dmlc/timer.h>
@@ -11,10 +11,10 @@
#include "xgboost/base.h"
#include "../common/common.h"
#include "./hist_util.h"
#include "./random.h"
#include "./column_matrix.h"
#include "./quantile.h"
#include "hist_util.h"
#include "random.h"
#include "column_matrix.h"
#include "quantile.h"
#include "./../tree/updater_quantile_hist.h"
#if defined(XGBOOST_MM_PREFETCH_PRESENT)
@@ -99,16 +99,16 @@ void GHistIndexMatrix::SetIndexDataForSparse(common::Span<uint32_t> index_data_s
void GHistIndexMatrix::ResizeIndex(const size_t rbegin, const SparsePage& batch,
const size_t n_offsets, const size_t n_index,
const bool isDense) {
if ((max_num_bins_ - 1 <= static_cast<int>(std::numeric_limits<uint8_t>::max())) && isDense) {
index.setBinTypeSize(UINT8_BINS_TYPE_SIZE);
index.resize((sizeof(uint8_t)) * n_index);
} else if ((max_num_bins_ - 1 > static_cast<int>(std::numeric_limits<uint8_t>::max()) &&
max_num_bins_ - 1 <= static_cast<int>(std::numeric_limits<uint16_t>::max())) && isDense) {
index.setBinTypeSize(UINT16_BINS_TYPE_SIZE);
index.resize((sizeof(uint16_t)) * n_index);
if ((max_num_bins - 1 <= static_cast<int>(std::numeric_limits<uint8_t>::max())) && isDense) {
index.SetBinTypeSize(kUint8BinsTypeSize);
index.Resize((sizeof(uint8_t)) * n_index);
} else if ((max_num_bins - 1 > static_cast<int>(std::numeric_limits<uint8_t>::max()) &&
max_num_bins - 1 <= static_cast<int>(std::numeric_limits<uint16_t>::max())) && isDense) {
index.SetBinTypeSize(kUint16BinsTypeSize);
index.Resize((sizeof(uint16_t)) * n_index);
} else {
index.setBinTypeSize(UINT32_BINS_TYPE_SIZE);
index.resize((sizeof(uint32_t)) * n_index);
index.SetBinTypeSize(kUint32BinsTypeSize);
index.Resize((sizeof(uint32_t)) * n_index);
}
}
@@ -449,15 +449,15 @@ void DenseCuts::Init
monitor_.Stop(__func__);
}
void GHistIndexMatrix::Init(DMatrix* p_fmat, int max_num_bins) {
cut.Build(p_fmat, max_num_bins);
max_num_bins_ = max_num_bins;
void GHistIndexMatrix::Init(DMatrix* p_fmat, int max_bins) {
cut.Build(p_fmat, max_bins);
max_num_bins = max_bins;
const int32_t nthread = omp_get_max_threads();
const uint32_t nbins = cut.Ptrs().back();
hit_count.resize(nbins, 0);
hit_count_tloc_.resize(nthread * nbins, 0);
this->p_fmat_ = p_fmat;
this->p_fmat = p_fmat;
size_t new_size = 1;
for (const auto &batch : p_fmat->GetBatches<SparsePage>()) {
new_size += batch.Size();
@@ -524,24 +524,24 @@ void GHistIndexMatrix::Init(DMatrix* p_fmat, int max_num_bins) {
uint32_t* offsets = nullptr;
if (isDense) {
index.resizeOffset(n_offsets);
offsets = index.offset();
index.ResizeOffset(n_offsets);
offsets = index.Offset();
for (size_t i = 0; i < n_offsets; ++i) {
offsets[i] = cut.Ptrs()[i];
}
}
if (isDense) {
BinTypeSize curent_bin_size = index.getBinTypeSize();
BinTypeSize curent_bin_size = index.GetBinTypeSize();
common::Span<const uint32_t> offsets_span = {offsets, n_offsets};
if (curent_bin_size == UINT8_BINS_TYPE_SIZE) {
if (curent_bin_size == kUint8BinsTypeSize) {
common::Span<uint8_t> index_data_span = {index.data<uint8_t>(), n_index};
SetIndexDataForDense(index_data_span, batch_threads, batch, rbegin, offsets_span, nbins);
} else if (curent_bin_size == UINT16_BINS_TYPE_SIZE) {
} else if (curent_bin_size == kUint16BinsTypeSize) {
common::Span<uint16_t> index_data_span = {index.data<uint16_t>(), n_index};
SetIndexDataForDense(index_data_span, batch_threads, batch, rbegin, offsets_span, nbins);
} else {
CHECK_EQ(curent_bin_size, UINT32_BINS_TYPE_SIZE);
CHECK_EQ(curent_bin_size, kUint32BinsTypeSize);
common::Span<uint32_t> index_data_span = {index.data<uint32_t>(), n_index};
SetIndexDataForDense(index_data_span, batch_threads, batch, rbegin, offsets_span, nbins);
}
@@ -689,16 +689,16 @@ FindGroups(const std::vector<unsigned>& feature_list,
}
BinTypeSize bins_type_size = colmat.GetTypeSize();
if (bins_type_size == UINT8_BINS_TYPE_SIZE) {
if (bins_type_size == kUint8BinsTypeSize) {
const auto column = colmat.GetColumn<uint8_t>(fid);
SetGroup(fid, *(column.get()), max_conflict_cnt, search_groups,
&group_conflict_cnt, &conflict_marks, &groups, &group_nnz, cur_fid_nnz, nrow);
} else if (bins_type_size == UINT16_BINS_TYPE_SIZE) {
} else if (bins_type_size == kUint16BinsTypeSize) {
const auto column = colmat.GetColumn<uint16_t>(fid);
SetGroup(fid, *(column.get()), max_conflict_cnt, search_groups,
&group_conflict_cnt, &conflict_marks, &groups, &group_nnz, cur_fid_nnz, nrow);
} else {
CHECK_EQ(bins_type_size, UINT32_BINS_TYPE_SIZE);
CHECK_EQ(bins_type_size, kUint32BinsTypeSize);
const auto column = colmat.GetColumn<uint32_t>(fid);
SetGroup(fid, *(column.get()), max_conflict_cnt, search_groups,
&group_conflict_cnt, &conflict_marks, &groups, &group_nnz, cur_fid_nnz, nrow);
@@ -909,7 +909,7 @@ void BuildHistDenseKernel(const std::vector<GradientPair>& gpair,
const size_t* rid = row_indices.begin;
const float* pgh = reinterpret_cast<const float*>(gpair.data());
const BinIdxType* gradient_index = gmat.index.data<BinIdxType>();
const uint32_t* offsets = gmat.index.offset();
const uint32_t* offsets = gmat.index.Offset();
FPType* hist_data = reinterpret_cast<FPType*>(hist.data());
const uint32_t two {2}; // Each element from 'gpair' and 'hist' contains
// 2 FP values: gradient and hessian.
@@ -1000,16 +1000,16 @@ void BuildHistKernel(const std::vector<GradientPair>& gpair,
const RowSetCollection::Elem row_indices,
const GHistIndexMatrix& gmat, const bool isDense, GHistRow hist) {
const bool is_dense = row_indices.Size() && isDense;
switch (gmat.index.getBinTypeSize()) {
case UINT8_BINS_TYPE_SIZE:
switch (gmat.index.GetBinTypeSize()) {
case kUint8BinsTypeSize:
BuildHistDispatchKernel<FPType, do_prefetch, uint8_t>(gpair, row_indices,
gmat, hist, is_dense);
break;
case UINT16_BINS_TYPE_SIZE:
case kUint16BinsTypeSize:
BuildHistDispatchKernel<FPType, do_prefetch, uint16_t>(gpair, row_indices,
gmat, hist, is_dense);
break;
case UINT32_BINS_TYPE_SIZE:
case kUint32BinsTypeSize:
BuildHistDispatchKernel<FPType, do_prefetch, uint32_t>(gpair, row_indices,
gmat, hist, is_dense);
break;

View File

@@ -45,9 +45,10 @@ class HistogramCuts {
common::Monitor monitor_;
public:
HostDeviceVector<bst_float> cut_values_;
HostDeviceVector<uint32_t> cut_ptrs_;
HostDeviceVector<float> min_vals_; // storing minimum value in a sketch set.
HostDeviceVector<bst_float> cut_values_; // NOLINT
HostDeviceVector<uint32_t> cut_ptrs_; // NOLINT
// storing minimum value in a sketch set.
HostDeviceVector<float> min_vals_; // NOLINT
HistogramCuts();
HistogramCuts(HistogramCuts const& that) {
@@ -211,14 +212,14 @@ HistogramCuts AdapterDeviceSketch(AdapterT* adapter, int num_bins,
enum BinTypeSize {
UINT8_BINS_TYPE_SIZE = 1,
UINT16_BINS_TYPE_SIZE = 2,
UINT32_BINS_TYPE_SIZE = 4
kUint8BinsTypeSize = 1,
kUint16BinsTypeSize = 2,
kUint32BinsTypeSize = 4
};
struct Index {
Index(): binTypeSize_(UINT8_BINS_TYPE_SIZE), p_(1), offset_ptr_(nullptr) {
setBinTypeSize(binTypeSize_);
Index() {
SetBinTypeSize(binTypeSize_);
}
Index(const Index& i) = delete;
Index& operator=(Index i) = delete;
@@ -231,75 +232,75 @@ struct Index {
return func_(data_ptr_, i);
}
}
void setBinTypeSize(BinTypeSize binTypeSize) {
void SetBinTypeSize(BinTypeSize binTypeSize) {
binTypeSize_ = binTypeSize;
switch (binTypeSize) {
case UINT8_BINS_TYPE_SIZE:
func_ = &getValueFromUint8;
case kUint8BinsTypeSize:
func_ = &GetValueFromUint8;
break;
case UINT16_BINS_TYPE_SIZE:
func_ = &getValueFromUint16;
case kUint16BinsTypeSize:
func_ = &GetValueFromUint16;
break;
case UINT32_BINS_TYPE_SIZE:
func_ = &getValueFromUint32;
case kUint32BinsTypeSize:
func_ = &GetValueFromUint32;
break;
default:
CHECK(binTypeSize == UINT8_BINS_TYPE_SIZE ||
binTypeSize == UINT16_BINS_TYPE_SIZE ||
binTypeSize == UINT32_BINS_TYPE_SIZE);
CHECK(binTypeSize == kUint8BinsTypeSize ||
binTypeSize == kUint16BinsTypeSize ||
binTypeSize == kUint32BinsTypeSize);
}
}
BinTypeSize getBinTypeSize() const {
BinTypeSize GetBinTypeSize() const {
return binTypeSize_;
}
template<typename T>
T* data() const {
T* data() const { // NOLINT
return static_cast<T*>(data_ptr_);
}
uint32_t* offset() const {
uint32_t* Offset() const {
return offset_ptr_;
}
size_t offsetSize() const {
size_t OffsetSize() const {
return offset_.size();
}
size_t size() const {
size_t Size() const {
return data_.size() / (binTypeSize_);
}
void resize(const size_t nBytesData) {
void Resize(const size_t nBytesData) {
data_.resize(nBytesData);
data_ptr_ = reinterpret_cast<void*>(data_.data());
}
void resizeOffset(const size_t nDisps) {
void ResizeOffset(const size_t nDisps) {
offset_.resize(nDisps);
offset_ptr_ = offset_.data();
p_ = nDisps;
}
std::vector<uint8_t>::const_iterator begin() const {
std::vector<uint8_t>::const_iterator begin() const { // NOLINT
return data_.begin();
}
std::vector<uint8_t>::const_iterator end() const {
std::vector<uint8_t>::const_iterator end() const { // NOLINT
return data_.end();
}
private:
static uint32_t getValueFromUint8(void *t, size_t i) {
static uint32_t GetValueFromUint8(void *t, size_t i) {
return reinterpret_cast<uint8_t*>(t)[i];
}
static uint32_t getValueFromUint16(void* t, size_t i) {
static uint32_t GetValueFromUint16(void* t, size_t i) {
return reinterpret_cast<uint16_t*>(t)[i];
}
static uint32_t getValueFromUint32(void* t, size_t i) {
static uint32_t GetValueFromUint32(void* t, size_t i) {
return reinterpret_cast<uint32_t*>(t)[i];
}
typedef uint32_t (*Func)(void*, size_t);
using Func = uint32_t (*)(void*, size_t);
std::vector<uint8_t> data_;
std::vector<uint32_t> offset_; // size of this field is equal to number of features
void* data_ptr_;
BinTypeSize binTypeSize_;
size_t p_;
uint32_t* offset_ptr_;
BinTypeSize binTypeSize_ {kUint8BinsTypeSize};
size_t p_ {1};
uint32_t* offset_ptr_ {nullptr};
Func func_;
};
@@ -319,8 +320,8 @@ struct GHistIndexMatrix {
std::vector<size_t> hit_count;
/*! \brief The corresponding cuts */
HistogramCuts cut;
DMatrix* p_fmat_;
size_t max_num_bins_;
DMatrix* p_fmat;
size_t max_num_bins;
// Create a global histogram matrix, given cut
void Init(DMatrix* p_fmat, int max_num_bins);
@@ -668,7 +669,7 @@ class ParallelGHistBuilder {
*/
class GHistBuilder {
public:
GHistBuilder() : nthread_{0}, nbins_{0} {}
GHistBuilder() = default;
GHistBuilder(size_t nthread, uint32_t nbins) : nthread_{nthread}, nbins_{nbins} {}
// construct a histogram via histogram aggregation
@@ -691,9 +692,9 @@ class GHistBuilder {
private:
/*! \brief number of threads for parallel computation */
size_t nthread_;
size_t nthread_ { 0 };
/*! \brief number of all bins over all features */
uint32_t nbins_;
uint32_t nbins_ { 0 };
};

View File

@@ -20,7 +20,7 @@ void JsonWriter::Save(Json json) {
void JsonWriter::Visit(JsonArray const* arr) {
this->Write("[");
auto const& vec = arr->getArray();
auto const& vec = arr->GetArray();
size_t size = vec.size();
for (size_t i = 0; i < size; ++i) {
auto const& value = vec[i];
@@ -36,9 +36,9 @@ void JsonWriter::Visit(JsonObject const* obj) {
this->NewLine();
size_t i = 0;
size_t size = obj->getObject().size();
size_t size = obj->GetObject().size();
for (auto& value : obj->getObject()) {
for (auto& value : obj->GetObject()) {
this->Write("\"" + value.first + "\":");
this->Save(value.second);
@@ -54,14 +54,14 @@ void JsonWriter::Visit(JsonObject const* obj) {
}
void JsonWriter::Visit(JsonNumber const* num) {
convertor_ << num->getNumber();
convertor_ << num->GetNumber();
auto const& str = convertor_.str();
this->Write(StringView{str.c_str(), str.size()});
convertor_.str("");
}
void JsonWriter::Visit(JsonInteger const* num) {
convertor_ << num->getInteger();
convertor_ << num->GetInteger();
auto const& str = convertor_.str();
this->Write(StringView{str.c_str(), str.size()});
convertor_.str("");
@@ -74,7 +74,7 @@ void JsonWriter::Visit(JsonNull const* null) {
void JsonWriter::Visit(JsonString const* str) {
std::string buffer;
buffer += '"';
auto const& string = str->getString();
auto const& string = str->GetString();
for (size_t i = 0; i < string.length(); i++) {
const char ch = string[i];
if (ch == '\\') {
@@ -109,7 +109,7 @@ void JsonWriter::Visit(JsonString const* str) {
}
void JsonWriter::Visit(JsonBoolean const* boolean) {
bool val = boolean->getBoolean();
bool val = boolean->GetBoolean();
if (val) {
this->Write(u8"true");
} else {
@@ -120,13 +120,13 @@ void JsonWriter::Visit(JsonBoolean const* boolean) {
// Value
std::string Value::TypeStr() const {
switch (kind_) {
case ValueKind::String: return "String"; break;
case ValueKind::Number: return "Number"; break;
case ValueKind::Object: return "Object"; break;
case ValueKind::Array: return "Array"; break;
case ValueKind::Boolean: return "Boolean"; break;
case ValueKind::Null: return "Null"; break;
case ValueKind::Integer: return "Integer"; break;
case ValueKind::kString: return "String"; break;
case ValueKind::kNumber: return "Number"; break;
case ValueKind::kObject: return "Object"; break;
case ValueKind::kArray: return "Array"; break;
case ValueKind::kBoolean: return "Boolean"; break;
case ValueKind::kNull: return "Null"; break;
case ValueKind::kInteger: return "Integer"; break;
}
return "";
}
@@ -140,10 +140,10 @@ Json& DummyJsonObject() {
// Json Object
JsonObject::JsonObject(JsonObject && that) :
Value(ValueKind::Object), object_{std::move(that.object_)} {}
Value(ValueKind::kObject), object_{std::move(that.object_)} {}
JsonObject::JsonObject(std::map<std::string, Json>&& object)
: Value(ValueKind::Object), object_{std::move(object)} {}
: Value(ValueKind::kObject), object_{std::move(object)} {}
Json& JsonObject::operator[](std::string const & key) {
return object_[key];
@@ -157,12 +157,12 @@ Json& JsonObject::operator[](int ind) {
bool JsonObject::operator==(Value const& rhs) const {
if (!IsA<JsonObject>(&rhs)) { return false; }
return object_ == Cast<JsonObject const>(&rhs)->getObject();
return object_ == Cast<JsonObject const>(&rhs)->GetObject();
}
Value& JsonObject::operator=(Value const &rhs) {
JsonObject const* casted = Cast<JsonObject const>(&rhs);
object_ = casted->getObject();
object_ = casted->GetObject();
return *this;
}
@@ -186,12 +186,12 @@ Json& JsonString::operator[](int ind) {
bool JsonString::operator==(Value const& rhs) const {
if (!IsA<JsonString>(&rhs)) { return false; }
return Cast<JsonString const>(&rhs)->getString() == str_;
return Cast<JsonString const>(&rhs)->GetString() == str_;
}
Value & JsonString::operator=(Value const &rhs) {
JsonString const* casted = Cast<JsonString const>(&rhs);
str_ = casted->getString();
str_ = casted->GetString();
return *this;
}
@@ -202,7 +202,7 @@ void JsonString::Save(JsonWriter* writer) {
// Json Array
JsonArray::JsonArray(JsonArray && that) :
Value(ValueKind::Array), vec_{std::move(that.vec_)} {}
Value(ValueKind::kArray), vec_{std::move(that.vec_)} {}
Json& JsonArray::operator[](std::string const & key) {
LOG(FATAL) << "Object of type "
@@ -216,13 +216,13 @@ Json& JsonArray::operator[](int ind) {
bool JsonArray::operator==(Value const& rhs) const {
if (!IsA<JsonArray>(&rhs)) { return false; }
auto& arr = Cast<JsonArray const>(&rhs)->getArray();
auto& arr = Cast<JsonArray const>(&rhs)->GetArray();
return std::equal(arr.cbegin(), arr.cend(), vec_.cbegin());
}
Value & JsonArray::operator=(Value const &rhs) {
JsonArray const* casted = Cast<JsonArray const>(&rhs);
vec_ = casted->getArray();
vec_ = casted->GetArray();
return *this;
}
@@ -245,12 +245,12 @@ Json& JsonNumber::operator[](int ind) {
bool JsonNumber::operator==(Value const& rhs) const {
if (!IsA<JsonNumber>(&rhs)) { return false; }
return std::abs(number_ - Cast<JsonNumber const>(&rhs)->getNumber()) < kRtEps;
return std::abs(number_ - Cast<JsonNumber const>(&rhs)->GetNumber()) < kRtEps;
}
Value & JsonNumber::operator=(Value const &rhs) {
JsonNumber const* casted = Cast<JsonNumber const>(&rhs);
number_ = casted->getNumber();
number_ = casted->GetNumber();
return *this;
}
@@ -273,12 +273,12 @@ Json& JsonInteger::operator[](int ind) {
bool JsonInteger::operator==(Value const& rhs) const {
if (!IsA<JsonInteger>(&rhs)) { return false; }
return integer_ == Cast<JsonInteger const>(&rhs)->getInteger();
return integer_ == Cast<JsonInteger const>(&rhs)->GetInteger();
}
Value & JsonInteger::operator=(Value const &rhs) {
JsonInteger const* casted = Cast<JsonInteger const>(&rhs);
integer_ = casted->getInteger();
integer_ = casted->GetInteger();
return *this;
}
@@ -328,12 +328,12 @@ Json& JsonBoolean::operator[](int ind) {
bool JsonBoolean::operator==(Value const& rhs) const {
if (!IsA<JsonBoolean>(&rhs)) { return false; }
return boolean_ == Cast<JsonBoolean const>(&rhs)->getBoolean();
return boolean_ == Cast<JsonBoolean const>(&rhs)->GetBoolean();
}
Value & JsonBoolean::operator=(Value const &rhs) {
JsonBoolean const* casted = Cast<JsonBoolean const>(&rhs);
boolean_ = casted->getBoolean();
boolean_ = casted->GetBoolean();
return *this;
}

View File

@@ -36,19 +36,19 @@ namespace xgboost {
*/
class TrainingObserver {
#if defined(XGBOOST_USE_DEBUG_OUTPUT)
bool constexpr static observe_ {true};
bool constexpr static kObserve {true};
#else
bool constexpr static observe_ {false};
bool constexpr static kObserve {false};
#endif // defined(XGBOOST_USE_DEBUG_OUTPUT)
public:
void Update(int32_t iter) const {
if (XGBOOST_EXPECT(!observe_, true)) { return; }
if (XGBOOST_EXPECT(!kObserve, true)) { return; }
OBSERVER_PRINT << "Iter: " << iter << OBSERVER_ENDL;
}
/*\brief Observe tree. */
void Observe(RegTree const& tree) {
if (XGBOOST_EXPECT(!observe_, true)) { return; }
if (XGBOOST_EXPECT(!kObserve, true)) { return; }
OBSERVER_PRINT << "Tree:" << OBSERVER_ENDL;
Json j_tree {Object()};
tree.SaveModel(&j_tree);
@@ -58,7 +58,7 @@ class TrainingObserver {
}
/*\brief Observe tree. */
void Observe(RegTree const* p_tree) {
if (XGBOOST_EXPECT(!observe_, true)) { return; }
if (XGBOOST_EXPECT(!kObserve, true)) { return; }
auto const& tree = *p_tree;
this->Observe(tree);
}
@@ -66,7 +66,7 @@ class TrainingObserver {
template <typename T>
void Observe(std::vector<T> const& h_vec, std::string name,
size_t n = std::numeric_limits<std::size_t>::max()) const {
if (XGBOOST_EXPECT(!observe_, true)) { return; }
if (XGBOOST_EXPECT(!kObserve, true)) { return; }
OBSERVER_PRINT << "Procedure: " << name << OBSERVER_ENDL;
for (size_t i = 0; i < h_vec.size(); ++i) {
@@ -84,14 +84,14 @@ class TrainingObserver {
template <typename T>
void Observe(HostDeviceVector<T> const& vec, std::string name,
size_t n = std::numeric_limits<std::size_t>::max()) const {
if (XGBOOST_EXPECT(!observe_, true)) { return; }
if (XGBOOST_EXPECT(!kObserve, true)) { return; }
auto const& h_vec = vec.HostVector();
this->Observe(h_vec, name, n);
}
template <typename T>
void Observe(HostDeviceVector<T>* vec, std::string name,
size_t n = std::numeric_limits<std::size_t>::max()) const {
if (XGBOOST_EXPECT(!observe_, true)) { return; }
if (XGBOOST_EXPECT(!kObserve, true)) { return; }
this->Observe(*vec, name, n);
}
@@ -100,14 +100,14 @@ class TrainingObserver {
typename std::enable_if<
std::is_base_of<XGBoostParameter<Parameter>, Parameter>::value>::type* = nullptr>
void Observe(const Parameter &p, std::string name) const {
if (XGBOOST_EXPECT(!observe_, true)) { return; }
if (XGBOOST_EXPECT(!kObserve, true)) { return; }
Json obj {toJson(p)};
OBSERVER_PRINT << "Parameter: " << name << ":\n" << obj << OBSERVER_ENDL;
}
/*\brief Observe parameters provided by users. */
void Observe(Args const& args) const {
if (XGBOOST_EXPECT(!observe_, true)) { return; }
if (XGBOOST_EXPECT(!kObserve, true)) { return; }
for (auto kv : args) {
OBSERVER_PRINT << kv.first << ": " << kv.second << OBSERVER_NEWLINE;

View File

@@ -59,6 +59,7 @@ class ProbabilityDistribution {
* \return Reference to the newly created probability distribution object
*/
static ProbabilityDistribution* Create(ProbabilityDistributionType dist);
virtual ~ProbabilityDistribution() = default;
};
/*! \brief The (standard) normal distribution */

View File

@@ -89,6 +89,8 @@ class RowSetCollection {
const size_t* end = dmlc::BeginPtr(row_indices_) + row_indices_.size();
elem_of_each_node_.emplace_back(Elem(begin, end, 0));
}
std::vector<size_t>* Data() { return &row_indices_; }
// split rowset into two
inline void AddSplit(unsigned node_id,
unsigned left_node_id,
@@ -116,10 +118,9 @@ class RowSetCollection {
elem_of_each_node_[node_id] = Elem(nullptr, nullptr, -1);
}
private:
// stores the row indexes in the set
std::vector<size_t> row_indices_;
private:
// vector: node_id -> elements
std::vector<Elem> elem_of_each_node_;
};
@@ -151,12 +152,12 @@ class PartitionBuilder {
common::Span<size_t> GetLeftBuffer(int nid, size_t begin, size_t end) {
const size_t task_idx = GetTaskIdx(nid, begin);
return { mem_blocks_.at(task_idx).left(), end - begin };
return { mem_blocks_.at(task_idx).Left(), end - begin };
}
common::Span<size_t> GetRightBuffer(int nid, size_t begin, size_t end) {
const size_t task_idx = GetTaskIdx(nid, begin);
return { mem_blocks_.at(task_idx).right(), end - begin };
return { mem_blocks_.at(task_idx).Right(), end - begin };
}
void SetNLeftElems(int nid, size_t begin, size_t end, size_t n_left) {
@@ -202,8 +203,8 @@ class PartitionBuilder {
size_t* left_result = rows_indexes + mem_blocks_[task_idx].n_offset_left;
size_t* right_result = rows_indexes + mem_blocks_[task_idx].n_offset_right;
const size_t* left = mem_blocks_[task_idx].left();
const size_t* right = mem_blocks_[task_idx].right();
const size_t* left = mem_blocks_[task_idx].Left();
const size_t* right = mem_blocks_[task_idx].Right();
std::copy_n(left, mem_blocks_[task_idx].n_left, left_result);
std::copy_n(right, mem_blocks_[task_idx].n_right, right_result);
@@ -221,11 +222,11 @@ class PartitionBuilder {
size_t n_offset_left;
size_t n_offset_right;
size_t* left() {
size_t* Left() {
return &left_data_[0];
}
size_t* right() {
size_t* Right() {
return &right_data_[0];
}
private:

View File

@@ -15,13 +15,13 @@ namespace common {
void Monitor::Start(std::string const &name) {
if (ConsoleLogger::ShouldLog(ConsoleLogger::LV::kDebug)) {
statistics_map[name].timer.Start();
statistics_map_[name].timer.Start();
}
}
void Monitor::Stop(const std::string &name) {
if (ConsoleLogger::ShouldLog(ConsoleLogger::LV::kDebug)) {
auto &stats = statistics_map[name];
auto &stats = statistics_map_[name];
stats.timer.Stop();
stats.count++;
}
@@ -40,7 +40,7 @@ std::vector<Monitor::StatMap> Monitor::CollectFromOtherRanks() const {
j_statistic["statistic"] = Object();
auto& statistic = j_statistic["statistic"];
for (auto const& kv : statistics_map) {
for (auto const& kv : statistics_map_) {
statistic[kv.first] = Object();
auto& j_pair = statistic[kv.first];
j_pair["count"] = Integer(kv.second.count);
@@ -105,7 +105,7 @@ void Monitor::Print() const {
auto world = this->CollectFromOtherRanks();
// rank zero is in charge of printing
if (rabit::GetRank() == 0) {
LOG(CONSOLE) << "======== Monitor: " << label << " ========";
LOG(CONSOLE) << "======== Monitor: " << label_ << " ========";
for (size_t i = 0; i < world.size(); ++i) {
LOG(CONSOLE) << "From rank: " << i << ": " << std::endl;
auto const& statistic = world[i];
@@ -114,12 +114,12 @@ void Monitor::Print() const {
}
} else {
StatMap stat_map;
for (auto const& kv : statistics_map) {
for (auto const& kv : statistics_map_) {
stat_map[kv.first] = std::make_pair(
kv.second.count, std::chrono::duration_cast<std::chrono::microseconds>(
kv.second.timer.elapsed).count());
}
LOG(CONSOLE) << "======== Monitor: " << label << " ========";
LOG(CONSOLE) << "======== Monitor: " << label_ << " ========";
this->PrintStatistics(stat_map);
}
}

View File

@@ -16,7 +16,7 @@ namespace common {
void Monitor::StartCuda(const std::string& name) {
if (ConsoleLogger::ShouldLog(ConsoleLogger::LV::kDebug)) {
auto &stats = statistics_map[name];
auto &stats = statistics_map_[name];
stats.timer.Start();
#if defined(XGBOOST_USE_NVTX)
stats.nvtx_id = nvtxRangeStartA(name.c_str());
@@ -26,7 +26,7 @@ void Monitor::StartCuda(const std::string& name) {
void Monitor::StopCuda(const std::string& name) {
if (ConsoleLogger::ShouldLog(ConsoleLogger::LV::kDebug)) {
auto &stats = statistics_map[name];
auto &stats = statistics_map_[name];
stats.timer.Stop();
stats.count++;
#if defined(XGBOOST_USE_NVTX)

View File

@@ -55,16 +55,16 @@ struct Monitor {
// from left to right, <name <count, elapsed>>
using StatMap = std::map<std::string, std::pair<size_t, size_t>>;
std::string label = "";
std::map<std::string, Statistics> statistics_map;
Timer self_timer;
std::string label_ = "";
std::map<std::string, Statistics> statistics_map_;
Timer self_timer_;
/*! \brief Collect time statistics across all workers. */
std::vector<StatMap> CollectFromOtherRanks() const;
void PrintStatistics(StatMap const& statistics) const;
public:
Monitor() { self_timer.Start(); }
Monitor() { self_timer_.Start(); }
/*\brief Print statistics info during destruction.
*
* Please note that this may not work, as with distributed frameworks like Dask, the
@@ -73,13 +73,13 @@ struct Monitor {
*/
~Monitor() {
this->Print();
self_timer.Stop();
self_timer_.Stop();
}
/*! \brief Print all the statistics. */
void Print() const;
void Init(std::string label) { this->label = label; }
void Init(std::string label) { this->label_ = label; }
void Start(const std::string &name);
void Stop(const std::string &name);
void StartCuda(const std::string &name);

View File

@@ -133,8 +133,9 @@ class Transform {
template <typename std::enable_if<CompiledWithCuda>::type* = nullptr,
typename... HDV>
void LaunchCUDA(Functor _func, HDV*... _vectors) const {
if (shard_)
if (shard_) {
UnpackShard(device_, _vectors...);
}
size_t range_size = *range_.end() - *range_.begin();