Fix external memory race in colmaker. (#4980)

* Move `GetColDensity` out of omp parallel block.
This commit is contained in:
Jiaming Yuan 2019-10-25 04:11:13 -04:00 committed by GitHub
parent 96cd7ec2bb
commit 6ec7e300bd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 29 additions and 18 deletions

View File

@ -10,6 +10,7 @@
#include <string> #include <string>
#include "xgboost/logging.h" #include "xgboost/logging.h"
#include "io.h"
namespace xgboost { namespace xgboost {
namespace common { namespace common {

View File

@ -19,8 +19,8 @@ using MemoryFixSizeBuffer = rabit::utils::MemoryFixSizeBuffer;
using MemoryBufferStream = rabit::utils::MemoryBufferStream; using MemoryBufferStream = rabit::utils::MemoryBufferStream;
/*! /*!
* \brief Input stream that support additional PeekRead * \brief Input stream that support additional PeekRead operation,
* operation, besides read. * besides read.
*/ */
class PeekableInStream : public dmlc::Stream { class PeekableInStream : public dmlc::Stream {
public: public:

View File

@ -121,7 +121,7 @@ void EllpackPageImpl::InitInfo(int device,
// Initialize the buffer to stored compressed features. // Initialize the buffer to stored compressed features.
void EllpackPageImpl::InitCompressedData(int device, size_t num_rows) { void EllpackPageImpl::InitCompressedData(int device, size_t num_rows) {
int num_symbols = matrix.info.n_bins + 1; size_t num_symbols = matrix.info.n_bins + 1;
// Required buffer size for storing data matrix in ELLPack format. // Required buffer size for storing data matrix in ELLPack format.
size_t compressed_size_bytes = common::CompressedBufferWriter::CalculateBufferSize( size_t compressed_size_bytes = common::CompressedBufferWriter::CalculateBufferSize(

View File

@ -140,7 +140,7 @@ struct RowStateOnDevice {
// to begin processing on each device // to begin processing on each device
class DeviceHistogramBuilderState { class DeviceHistogramBuilderState {
public: public:
explicit DeviceHistogramBuilderState(int n_rows) : device_row_state_(n_rows) {} explicit DeviceHistogramBuilderState(size_t n_rows) : device_row_state_(n_rows) {}
const RowStateOnDevice& GetRowStateOnDevice() const { const RowStateOnDevice& GetRowStateOnDevice() const {
return device_row_state_; return device_row_state_;

View File

@ -41,6 +41,7 @@ class SparseBatchIteratorImpl : public BatchIteratorImpl<T> {
BatchSet<SparsePage> SparsePageDMatrix::GetRowBatches() { BatchSet<SparsePage> SparsePageDMatrix::GetRowBatches() {
auto cast = dynamic_cast<SparsePageSource<SparsePage>*>(row_source_.get()); auto cast = dynamic_cast<SparsePageSource<SparsePage>*>(row_source_.get());
CHECK(cast);
cast->BeforeFirst(); cast->BeforeFirst();
cast->Next(); cast->Next();
auto begin_iter = BatchIterator<SparsePage>( auto begin_iter = BatchIterator<SparsePage>(

View File

@ -1,5 +1,5 @@
/*! /*!
* Copyright (c) 2014 by Contributors * Copyright (c) 2014-2019 by Contributors
* \file page_csr_source.h * \file page_csr_source.h
* External memory data source, saved with sparse_batch_page binary format. * External memory data source, saved with sparse_batch_page binary format.
* \author Tianqi Chen * \author Tianqi Chen
@ -7,8 +7,6 @@
#ifndef XGBOOST_DATA_SPARSE_PAGE_SOURCE_H_ #ifndef XGBOOST_DATA_SPARSE_PAGE_SOURCE_H_
#define XGBOOST_DATA_SPARSE_PAGE_SOURCE_H_ #define XGBOOST_DATA_SPARSE_PAGE_SOURCE_H_
#include <xgboost/base.h>
#include <xgboost/data.h>
#include <dmlc/threadediter.h> #include <dmlc/threadediter.h>
#include <dmlc/timer.h> #include <dmlc/timer.h>
@ -20,6 +18,9 @@
#include <utility> #include <utility>
#include <vector> #include <vector>
#include "xgboost/base.h"
#include "xgboost/data.h"
#include "sparse_page_writer.h" #include "sparse_page_writer.h"
#include "../common/common.h" #include "../common/common.h"

View File

@ -1,5 +1,5 @@
/*! /*!
* Copyright (c) 2014 by Contributors * Copyright (c) 2014-2019 by Contributors
* \file sparse_page_writer.h * \file sparse_page_writer.h
* \author Tianqi Chen * \author Tianqi Chen
*/ */

View File

@ -34,7 +34,7 @@ DMLC_REGISTRY_FILE_TAG(rank_obj_gpu);
#endif // defined(XGBOOST_USE_CUDA) #endif // defined(XGBOOST_USE_CUDA)
struct LambdaRankParam : public XGBoostParameter<LambdaRankParam> { struct LambdaRankParam : public XGBoostParameter<LambdaRankParam> {
int num_pairsample; size_t num_pairsample;
float fix_list_weight; float fix_list_weight;
// declare parameters // declare parameters
DMLC_DECLARE_PARAMETER(LambdaRankParam) { DMLC_DECLARE_PARAMETER(LambdaRankParam) {
@ -337,7 +337,7 @@ class SortedLabelList {
// Launch a kernel that populates the segment information for the different groups // Launch a kernel that populates the segment information for the different groups
uint32_t *gsegs = group_segments_.data().get(); uint32_t *gsegs = group_segments_.data().get();
const unsigned *dgroups = dgroups_.data().get(); const unsigned *dgroups = dgroups_.data().get();
int ngroups = dgroups_.size(); size_t ngroups = dgroups_.size();
dh::LaunchN(device_id_, num_elems, nullptr, [=] __device__(unsigned idx){ dh::LaunchN(device_id_, num_elems, nullptr, [=] __device__(unsigned idx){
// Find the group first // Find the group first
int group_idx = dh::UpperBound(dgroups, ngroups, idx); int group_idx = dh::UpperBound(dgroups, ngroups, idx);
@ -405,10 +405,10 @@ class SortedLabelList {
float weight_normalization_factor) { float weight_normalization_factor) {
// Group info on device // Group info on device
const unsigned *dgroups = dgroups_.data().get(); const unsigned *dgroups = dgroups_.data().get();
int ngroups = dgroups_.size(); size_t ngroups = dgroups_.size();
uint32_t total_items = group_segments_.size(); auto total_items = group_segments_.size();
int niter = param_.num_pairsample * total_items; size_t niter = param_.num_pairsample * total_items;
float fix_list_weight = param_.fix_list_weight; float fix_list_weight = param_.fix_list_weight;

View File

@ -339,7 +339,7 @@ class ColMaker: public TreeUpdater {
} }
} }
// rescan, generate candidate split // rescan, generate candidate split
#pragma omp parallel #pragma omp parallel
{ {
GradStats c, cright; GradStats c, cright;
const int tid = omp_get_thread_num(); const int tid = omp_get_thread_num();
@ -608,17 +608,25 @@ class ColMaker: public TreeUpdater {
poption = static_cast<int>(num_features) * 2 < this->nthread_ ? 1 : 0; poption = static_cast<int>(num_features) * 2 < this->nthread_ ? 1 : 0;
} }
if (poption == 0) { if (poption == 0) {
std::vector<float> densities(num_features);
CHECK_EQ(feat_set.size(), num_features);
for (bst_omp_uint i = 0; i < num_features; ++i) {
int32_t const fid = feat_set[i];
densities.at(i) = p_fmat->GetColDensity(fid);
}
#pragma omp parallel for schedule(dynamic, batch_size) #pragma omp parallel for schedule(dynamic, batch_size)
for (bst_omp_uint i = 0; i < num_features; ++i) { for (bst_omp_uint i = 0; i < num_features; ++i) {
int fid = feat_set[i]; int32_t const fid = feat_set[i];
const int tid = omp_get_thread_num(); int32_t const tid = omp_get_thread_num();
auto c = batch[fid]; auto c = batch[fid];
const bool ind = c.size() != 0 && c[0].fvalue == c[c.size() - 1].fvalue; const bool ind = c.size() != 0 && c[0].fvalue == c[c.size() - 1].fvalue;
if (param_.NeedForwardSearch(p_fmat->GetColDensity(fid), ind)) { auto const density = densities[i];
if (param_.NeedForwardSearch(density, ind)) {
this->EnumerateSplit(c.data(), c.data() + c.size(), +1, this->EnumerateSplit(c.data(), c.data() + c.size(), +1,
fid, gpair, info, stemp_[tid]); fid, gpair, info, stemp_[tid]);
} }
if (param_.NeedBackwardSearch(p_fmat->GetColDensity(fid), ind)) { if (param_.NeedBackwardSearch(density, ind)) {
this->EnumerateSplit(c.data() + c.size() - 1, c.data() - 1, -1, this->EnumerateSplit(c.data() + c.size() - 1, c.data() - 1, -1,
fid, gpair, info, stemp_[tid]); fid, gpair, info, stemp_[tid]);
} }