Fix clang-tidy warnings. (#4149)
* Upgrade gtest for clang-tidy. * Use CMake to install GTest instead of mv. * Don't enforce clang-tidy to return 0 due to errors in thrust. * Add a small test for tidy itself. * Reformat.
This commit is contained in:
@@ -42,7 +42,9 @@ class Column {
|
||||
uint32_t GetBaseIdx() const { return index_base_; }
|
||||
ColumnType GetType() const { return type_; }
|
||||
size_t GetRowIdx(size_t idx) const {
|
||||
return type_ == ColumnType::kDenseColumn ? idx : row_ind_[idx];
|
||||
// clang-tidy worries that row_ind_ might be a nullptr, which is possible,
|
||||
// but low level structure is not safe anyway.
|
||||
return type_ == ColumnType::kDenseColumn ? idx : row_ind_[idx]; // NOLINT
|
||||
}
|
||||
bool IsMissing(size_t idx) const {
|
||||
return index_[idx] == std::numeric_limits<uint32_t>::max();
|
||||
@@ -68,7 +70,7 @@ class ColumnMatrix {
|
||||
|
||||
// construct column matrix from GHistIndexMatrix
|
||||
inline void Init(const GHistIndexMatrix& gmat,
|
||||
double sparse_threshold) {
|
||||
double sparse_threshold) {
|
||||
const auto nfeature = static_cast<bst_uint>(gmat.cut.row_ptr.size() - 1);
|
||||
const size_t nrow = gmat.row_ptr.size() - 1;
|
||||
|
||||
|
||||
@@ -772,7 +772,7 @@ template <typename T>
|
||||
typename std::iterator_traits<T>::value_type SumReduction(
|
||||
dh::CubMemory &tmp_mem, T in, int nVals) {
|
||||
using ValueT = typename std::iterator_traits<T>::value_type;
|
||||
size_t tmpSize;
|
||||
size_t tmpSize {0};
|
||||
ValueT *dummy_out = nullptr;
|
||||
dh::safe_cuda(cub::DeviceReduce::Sum(nullptr, tmpSize, in, dummy_out, nVals));
|
||||
// Allocate small extra memory for the return value
|
||||
|
||||
@@ -548,7 +548,7 @@ void GHistBuilder::BuildBlockHist(const std::vector<GradientPair>& gpair,
|
||||
const size_t rest = nrows % kUnroll;
|
||||
|
||||
#if defined(_OPENMP)
|
||||
const auto nthread = static_cast<bst_omp_uint>(this->nthread_);
|
||||
const auto nthread = static_cast<bst_omp_uint>(this->nthread_); // NOLINT
|
||||
#endif // defined(_OPENMP)
|
||||
tree::GradStats* p_hist = hist.data();
|
||||
|
||||
@@ -594,7 +594,7 @@ void GHistBuilder::SubtractionTrick(GHistRow self, GHistRow sibling, GHistRow pa
|
||||
const uint32_t rest = nbins % kUnroll;
|
||||
|
||||
#if defined(_OPENMP)
|
||||
const auto nthread = static_cast<bst_omp_uint>(this->nthread_);
|
||||
const auto nthread = static_cast<bst_omp_uint>(this->nthread_); // NOLINT
|
||||
#endif // defined(_OPENMP)
|
||||
tree::GradStats* p_self = self.data();
|
||||
tree::GradStats* p_sibling = sibling.data();
|
||||
|
||||
@@ -24,13 +24,14 @@ namespace common {
|
||||
|
||||
using WXQSketch = HistCutMatrix::WXQSketch;
|
||||
|
||||
__global__ void find_cuts_k
|
||||
__global__ void FindCutsK
|
||||
(WXQSketch::Entry* __restrict__ cuts, const bst_float* __restrict__ data,
|
||||
const float* __restrict__ cum_weights, int nsamples, int ncuts) {
|
||||
// ncuts < nsamples
|
||||
int icut = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
if (icut >= ncuts)
|
||||
if (icut >= ncuts) {
|
||||
return;
|
||||
}
|
||||
WXQSketch::Entry v;
|
||||
int isample = 0;
|
||||
if (icut == 0) {
|
||||
@@ -55,7 +56,7 @@ struct IsNotNaN {
|
||||
__device__ bool operator()(float a) const { return !isnan(a); }
|
||||
};
|
||||
|
||||
__global__ void unpack_features_k
|
||||
__global__ void UnpackFeaturesK
|
||||
(float* __restrict__ fvalues, float* __restrict__ feature_weights,
|
||||
const size_t* __restrict__ row_ptrs, const float* __restrict__ weights,
|
||||
Entry* entries, size_t nrows_array, int ncols, size_t row_begin_ptr,
|
||||
@@ -75,7 +76,7 @@ __global__ void unpack_features_k
|
||||
// if and only if it is also written to features
|
||||
if (!isnan(entry.fvalue) && (weights == nullptr || !isnan(weights[irow]))) {
|
||||
fvalues[ind] = entry.fvalue;
|
||||
if (feature_weights != nullptr) {
|
||||
if (feature_weights != nullptr && weights != nullptr) {
|
||||
feature_weights[ind] = weights[irow];
|
||||
}
|
||||
}
|
||||
@@ -84,7 +85,7 @@ __global__ void unpack_features_k
|
||||
// finds quantiles on the GPU
|
||||
struct GPUSketcher {
|
||||
// manage memory for a single GPU
|
||||
struct DeviceShard {
|
||||
class DeviceShard {
|
||||
int device_;
|
||||
bst_uint row_begin_; // The row offset for this shard
|
||||
bst_uint row_end_;
|
||||
@@ -110,6 +111,7 @@ struct GPUSketcher {
|
||||
thrust::device_vector<size_t> num_elements_;
|
||||
thrust::device_vector<char> tmp_storage_;
|
||||
|
||||
public:
|
||||
DeviceShard(int device, bst_uint row_begin, bst_uint row_end,
|
||||
tree::TrainParam param) :
|
||||
device_(device), row_begin_(row_begin), row_end_(row_end),
|
||||
@@ -268,7 +270,7 @@ struct GPUSketcher {
|
||||
} else if (n_cuts_cur_[icol] > 0) {
|
||||
// if more elements than cuts: use binary search on cumulative weights
|
||||
int block = 256;
|
||||
find_cuts_k<<<dh::DivRoundUp(n_cuts_cur_[icol], block), block>>>
|
||||
FindCutsK<<<dh::DivRoundUp(n_cuts_cur_[icol], block), block>>>
|
||||
(cuts_d_.data().get() + icol * n_cuts_, fvalues_cur_.data().get(),
|
||||
weights2_.data().get(), n_unique, n_cuts_cur_[icol]);
|
||||
dh::safe_cuda(cudaGetLastError()); // NOLINT
|
||||
@@ -309,7 +311,7 @@ struct GPUSketcher {
|
||||
dim3 block3(64, 4, 1);
|
||||
dim3 grid3(dh::DivRoundUp(batch_nrows, block3.x),
|
||||
dh::DivRoundUp(num_cols_, block3.y), 1);
|
||||
unpack_features_k<<<grid3, block3>>>
|
||||
UnpackFeaturesK<<<grid3, block3>>>
|
||||
(fvalues_.data().get(), has_weights_ ? feature_weights_.data().get() : nullptr,
|
||||
row_ptrs_.data().get() + batch_row_begin,
|
||||
has_weights_ ? weights_.data().get() : nullptr, entries_.data().get(),
|
||||
@@ -340,6 +342,10 @@ struct GPUSketcher {
|
||||
SketchBatch(row_batch, info, gpu_batch);
|
||||
}
|
||||
}
|
||||
|
||||
void GetSummary(WXQSketch::SummaryContainer *summary, size_t const icol) {
|
||||
sketches_[icol].GetSummary(summary);
|
||||
}
|
||||
};
|
||||
|
||||
void Sketch(const SparsePage& batch, const MetaInfo& info,
|
||||
@@ -368,8 +374,8 @@ struct GPUSketcher {
|
||||
WXQSketch::SummaryContainer summary;
|
||||
for (int icol = 0; icol < num_cols; ++icol) {
|
||||
sketches[icol].Init(batch.Size(), 1.0 / (8 * param_.max_bin));
|
||||
for (int shard = 0; shard < shards_.size(); ++shard) {
|
||||
shards_[shard]->sketches_[icol].GetSummary(&summary);
|
||||
for (auto &shard : shards_) {
|
||||
shard->GetSummary(&summary, icol);
|
||||
sketches[icol].PushSummary(summary);
|
||||
}
|
||||
}
|
||||
@@ -381,6 +387,7 @@ struct GPUSketcher {
|
||||
dist_ = GPUDistribution::Block(GPUSet::All(param_.gpu_id, param_.n_gpus, n_rows));
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<std::unique_ptr<DeviceShard>> shards_;
|
||||
tree::TrainParam param_;
|
||||
GPUDistribution dist_;
|
||||
|
||||
@@ -38,6 +38,7 @@ struct HistCutMatrix {
|
||||
void Init(std::vector<WXQSketch>* sketchs, uint32_t max_num_bins);
|
||||
|
||||
HistCutMatrix();
|
||||
size_t NumBins() const { return row_ptr.back(); }
|
||||
|
||||
protected:
|
||||
virtual size_t SearchGroupIndFromBaseRow(
|
||||
|
||||
@@ -18,6 +18,11 @@ struct HostDeviceVectorImpl {
|
||||
explicit HostDeviceVectorImpl(size_t size, T v) : data_h_(size, v), distribution_() {}
|
||||
HostDeviceVectorImpl(std::initializer_list<T> init) : data_h_(init), distribution_() {}
|
||||
explicit HostDeviceVectorImpl(std::vector<T> init) : data_h_(std::move(init)), distribution_() {}
|
||||
|
||||
std::vector<T>& Vec() { return data_h_; }
|
||||
GPUDistribution& Dist() { return distribution_; }
|
||||
|
||||
private:
|
||||
std::vector<T> data_h_;
|
||||
GPUDistribution distribution_;
|
||||
};
|
||||
@@ -64,14 +69,14 @@ HostDeviceVector<T>& HostDeviceVector<T>::operator=(const HostDeviceVector<T>& o
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
size_t HostDeviceVector<T>::Size() const { return impl_->data_h_.size(); }
|
||||
size_t HostDeviceVector<T>::Size() const { return impl_->Vec().size(); }
|
||||
|
||||
template <typename T>
|
||||
GPUSet HostDeviceVector<T>::Devices() const { return GPUSet::Empty(); }
|
||||
|
||||
template <typename T>
|
||||
const GPUDistribution& HostDeviceVector<T>::Distribution() const {
|
||||
return impl_->distribution_;
|
||||
return impl_->Dist();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
@@ -93,16 +98,16 @@ common::Span<const T> HostDeviceVector<T>::ConstDeviceSpan(int device) const {
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
std::vector<T>& HostDeviceVector<T>::HostVector() { return impl_->data_h_; }
|
||||
std::vector<T>& HostDeviceVector<T>::HostVector() { return impl_->Vec(); }
|
||||
|
||||
template <typename T>
|
||||
const std::vector<T>& HostDeviceVector<T>::ConstHostVector() const {
|
||||
return impl_->data_h_;
|
||||
return impl_->Vec();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void HostDeviceVector<T>::Resize(size_t new_size, T v) {
|
||||
impl_->data_h_.resize(new_size, v);
|
||||
impl_->Vec().resize(new_size, v);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
|
||||
@@ -23,10 +23,10 @@ void SetCudaSetDeviceHandler(void (*handler)(int)) {
|
||||
// wrapper over access with useful methods
|
||||
class Permissions {
|
||||
GPUAccess access_;
|
||||
explicit Permissions(GPUAccess access) : access_(access) {}
|
||||
explicit Permissions(GPUAccess access) : access_{access} {}
|
||||
|
||||
public:
|
||||
Permissions() : access_(GPUAccess::kNone) {}
|
||||
Permissions() : access_{GPUAccess::kNone} {}
|
||||
explicit Permissions(bool perm)
|
||||
: access_(perm ? GPUAccess::kWrite : GPUAccess::kNone) {}
|
||||
|
||||
@@ -46,8 +46,8 @@ template <typename T>
|
||||
struct HostDeviceVectorImpl {
|
||||
struct DeviceShard {
|
||||
DeviceShard()
|
||||
: proper_size_(0), device_(-1), start_(0), perm_d_(false),
|
||||
cached_size_(~0), vec_(nullptr) {}
|
||||
: proper_size_{0}, device_{-1}, start_{0}, perm_d_{false},
|
||||
cached_size_{static_cast<size_t>(~0)}, vec_{nullptr} {}
|
||||
|
||||
void Init(HostDeviceVectorImpl<T>* vec, int device) {
|
||||
if (vec_ == nullptr) { vec_ = vec; }
|
||||
@@ -154,6 +154,13 @@ struct HostDeviceVectorImpl {
|
||||
}
|
||||
}
|
||||
|
||||
T* Raw() { return data_.data().get(); }
|
||||
size_t Start() const { return start_; }
|
||||
size_t DataSize() const { return data_.size(); }
|
||||
Permissions& Perm() { return perm_d_; }
|
||||
Permissions const& Perm() const { return perm_d_; }
|
||||
|
||||
private:
|
||||
int device_;
|
||||
thrust::device_vector<T> data_;
|
||||
// cached vector size
|
||||
@@ -216,41 +223,42 @@ struct HostDeviceVectorImpl {
|
||||
T* DevicePointer(int device) {
|
||||
CHECK(distribution_.devices_.Contains(device));
|
||||
LazySyncDevice(device, GPUAccess::kWrite);
|
||||
return shards_.at(distribution_.devices_.Index(device)).data_.data().get();
|
||||
return shards_.at(distribution_.devices_.Index(device)).Raw();
|
||||
}
|
||||
|
||||
const T* ConstDevicePointer(int device) {
|
||||
CHECK(distribution_.devices_.Contains(device));
|
||||
LazySyncDevice(device, GPUAccess::kRead);
|
||||
return shards_.at(distribution_.devices_.Index(device)).data_.data().get();
|
||||
return shards_.at(distribution_.devices_.Index(device)).Raw();
|
||||
}
|
||||
|
||||
common::Span<T> DeviceSpan(int device) {
|
||||
GPUSet devices = distribution_.devices_;
|
||||
CHECK(devices.Contains(device));
|
||||
LazySyncDevice(device, GPUAccess::kWrite);
|
||||
return {shards_.at(devices.Index(device)).data_.data().get(),
|
||||
static_cast<typename common::Span<T>::index_type>(DeviceSize(device))};
|
||||
return {shards_.at(devices.Index(device)).Raw(),
|
||||
static_cast<typename common::Span<T>::index_type>(DeviceSize(device))};
|
||||
}
|
||||
|
||||
common::Span<const T> ConstDeviceSpan(int device) {
|
||||
GPUSet devices = distribution_.devices_;
|
||||
CHECK(devices.Contains(device));
|
||||
LazySyncDevice(device, GPUAccess::kRead);
|
||||
return {shards_.at(devices.Index(device)).data_.data().get(),
|
||||
static_cast<typename common::Span<const T>::index_type>(DeviceSize(device))};
|
||||
using SpanInd = typename common::Span<const T>::index_type;
|
||||
return {shards_.at(devices.Index(device)).Raw(),
|
||||
static_cast<SpanInd>(DeviceSize(device))};
|
||||
}
|
||||
|
||||
size_t DeviceSize(int device) {
|
||||
CHECK(distribution_.devices_.Contains(device));
|
||||
LazySyncDevice(device, GPUAccess::kRead);
|
||||
return shards_.at(distribution_.devices_.Index(device)).data_.size();
|
||||
return shards_.at(distribution_.devices_.Index(device)).DataSize();
|
||||
}
|
||||
|
||||
size_t DeviceStart(int device) {
|
||||
CHECK(distribution_.devices_.Contains(device));
|
||||
LazySyncDevice(device, GPUAccess::kRead);
|
||||
return shards_.at(distribution_.devices_.Index(device)).start_;
|
||||
return shards_.at(distribution_.devices_.Index(device)).Start();
|
||||
}
|
||||
|
||||
thrust::device_ptr<T> tbegin(int device) { // NOLINT
|
||||
@@ -293,7 +301,7 @@ struct HostDeviceVectorImpl {
|
||||
}
|
||||
}
|
||||
|
||||
void Fill(T v) {
|
||||
void Fill(T v) { // NOLINT
|
||||
if (perm_h_.CanWrite()) {
|
||||
std::fill(data_h_.begin(), data_h_.end(), v);
|
||||
} else {
|
||||
@@ -389,7 +397,7 @@ struct HostDeviceVectorImpl {
|
||||
if (perm_h_.CanRead()) {
|
||||
// data is present, just need to deny access to the device
|
||||
dh::ExecuteIndexShards(&shards_, [&](int idx, DeviceShard& shard) {
|
||||
shard.perm_d_.DenyComplementary(access);
|
||||
shard.Perm().DenyComplementary(access);
|
||||
});
|
||||
perm_h_.Grant(access);
|
||||
return;
|
||||
@@ -412,9 +420,10 @@ struct HostDeviceVectorImpl {
|
||||
bool DeviceCanAccess(int device, GPUAccess access) {
|
||||
GPUSet devices = distribution_.Devices();
|
||||
if (!devices.Contains(device)) { return false; }
|
||||
return shards_.at(devices.Index(device)).perm_d_.CanAccess(access);
|
||||
return shards_.at(devices.Index(device)).Perm().CanAccess(access);
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<T> data_h_;
|
||||
Permissions perm_h_;
|
||||
// the total size of the data stored on the devices
|
||||
|
||||
@@ -7,6 +7,8 @@
|
||||
#ifndef XGBOOST_COMMON_MATH_H_
|
||||
#define XGBOOST_COMMON_MATH_H_
|
||||
|
||||
#include <xgboost/base.h>
|
||||
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include <cmath>
|
||||
|
||||
@@ -622,8 +622,8 @@ XGBOOST_DEVICE auto as_writable_bytes(Span<T, E> s) __span_noexcept -> // NOLIN
|
||||
return {reinterpret_cast<byte*>(s.data()), s.size_bytes()};
|
||||
}
|
||||
|
||||
} // namespace common NOLINT
|
||||
} // namespace xgboost NOLINT
|
||||
} // namespace common
|
||||
} // namespace xgboost
|
||||
|
||||
#if defined(_MSC_VER) &&_MSC_VER < 1910
|
||||
#undef constexpr
|
||||
|
||||
Reference in New Issue
Block a user