Fix clang-tidy warnings. (#4149)
* Upgrade gtest for clang-tidy. * Use CMake to install GTest instead of mv. * Don't enforce clang-tidy to return 0 due to errors in thrust. * Add a small test for tidy itself. * Reformat.
This commit is contained in:
@@ -24,13 +24,14 @@ namespace common {
|
||||
|
||||
using WXQSketch = HistCutMatrix::WXQSketch;
|
||||
|
||||
__global__ void find_cuts_k
|
||||
__global__ void FindCutsK
|
||||
(WXQSketch::Entry* __restrict__ cuts, const bst_float* __restrict__ data,
|
||||
const float* __restrict__ cum_weights, int nsamples, int ncuts) {
|
||||
// ncuts < nsamples
|
||||
int icut = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
if (icut >= ncuts)
|
||||
if (icut >= ncuts) {
|
||||
return;
|
||||
}
|
||||
WXQSketch::Entry v;
|
||||
int isample = 0;
|
||||
if (icut == 0) {
|
||||
@@ -55,7 +56,7 @@ struct IsNotNaN {
|
||||
__device__ bool operator()(float a) const { return !isnan(a); }
|
||||
};
|
||||
|
||||
__global__ void unpack_features_k
|
||||
__global__ void UnpackFeaturesK
|
||||
(float* __restrict__ fvalues, float* __restrict__ feature_weights,
|
||||
const size_t* __restrict__ row_ptrs, const float* __restrict__ weights,
|
||||
Entry* entries, size_t nrows_array, int ncols, size_t row_begin_ptr,
|
||||
@@ -75,7 +76,7 @@ __global__ void unpack_features_k
|
||||
// if and only if it is also written to features
|
||||
if (!isnan(entry.fvalue) && (weights == nullptr || !isnan(weights[irow]))) {
|
||||
fvalues[ind] = entry.fvalue;
|
||||
if (feature_weights != nullptr) {
|
||||
if (feature_weights != nullptr && weights != nullptr) {
|
||||
feature_weights[ind] = weights[irow];
|
||||
}
|
||||
}
|
||||
@@ -84,7 +85,7 @@ __global__ void unpack_features_k
|
||||
// finds quantiles on the GPU
|
||||
struct GPUSketcher {
|
||||
// manage memory for a single GPU
|
||||
struct DeviceShard {
|
||||
class DeviceShard {
|
||||
int device_;
|
||||
bst_uint row_begin_; // The row offset for this shard
|
||||
bst_uint row_end_;
|
||||
@@ -110,6 +111,7 @@ struct GPUSketcher {
|
||||
thrust::device_vector<size_t> num_elements_;
|
||||
thrust::device_vector<char> tmp_storage_;
|
||||
|
||||
public:
|
||||
DeviceShard(int device, bst_uint row_begin, bst_uint row_end,
|
||||
tree::TrainParam param) :
|
||||
device_(device), row_begin_(row_begin), row_end_(row_end),
|
||||
@@ -268,7 +270,7 @@ struct GPUSketcher {
|
||||
} else if (n_cuts_cur_[icol] > 0) {
|
||||
// if more elements than cuts: use binary search on cumulative weights
|
||||
int block = 256;
|
||||
find_cuts_k<<<dh::DivRoundUp(n_cuts_cur_[icol], block), block>>>
|
||||
FindCutsK<<<dh::DivRoundUp(n_cuts_cur_[icol], block), block>>>
|
||||
(cuts_d_.data().get() + icol * n_cuts_, fvalues_cur_.data().get(),
|
||||
weights2_.data().get(), n_unique, n_cuts_cur_[icol]);
|
||||
dh::safe_cuda(cudaGetLastError()); // NOLINT
|
||||
@@ -309,7 +311,7 @@ struct GPUSketcher {
|
||||
dim3 block3(64, 4, 1);
|
||||
dim3 grid3(dh::DivRoundUp(batch_nrows, block3.x),
|
||||
dh::DivRoundUp(num_cols_, block3.y), 1);
|
||||
unpack_features_k<<<grid3, block3>>>
|
||||
UnpackFeaturesK<<<grid3, block3>>>
|
||||
(fvalues_.data().get(), has_weights_ ? feature_weights_.data().get() : nullptr,
|
||||
row_ptrs_.data().get() + batch_row_begin,
|
||||
has_weights_ ? weights_.data().get() : nullptr, entries_.data().get(),
|
||||
@@ -340,6 +342,10 @@ struct GPUSketcher {
|
||||
SketchBatch(row_batch, info, gpu_batch);
|
||||
}
|
||||
}
|
||||
|
||||
void GetSummary(WXQSketch::SummaryContainer *summary, size_t const icol) {
|
||||
sketches_[icol].GetSummary(summary);
|
||||
}
|
||||
};
|
||||
|
||||
void Sketch(const SparsePage& batch, const MetaInfo& info,
|
||||
@@ -368,8 +374,8 @@ struct GPUSketcher {
|
||||
WXQSketch::SummaryContainer summary;
|
||||
for (int icol = 0; icol < num_cols; ++icol) {
|
||||
sketches[icol].Init(batch.Size(), 1.0 / (8 * param_.max_bin));
|
||||
for (int shard = 0; shard < shards_.size(); ++shard) {
|
||||
shards_[shard]->sketches_[icol].GetSummary(&summary);
|
||||
for (auto &shard : shards_) {
|
||||
shard->GetSummary(&summary, icol);
|
||||
sketches[icol].PushSummary(summary);
|
||||
}
|
||||
}
|
||||
@@ -381,6 +387,7 @@ struct GPUSketcher {
|
||||
dist_ = GPUDistribution::Block(GPUSet::All(param_.gpu_id, param_.n_gpus, n_rows));
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<std::unique_ptr<DeviceShard>> shards_;
|
||||
tree::TrainParam param_;
|
||||
GPUDistribution dist_;
|
||||
|
||||
Reference in New Issue
Block a user