Fix external memory race in colmaker. (#4980)

* Move `GetColDensity` out of omp parallel block.
This commit is contained in:
Jiaming Yuan 2019-10-25 04:11:13 -04:00 committed by GitHub
parent 96cd7ec2bb
commit 6ec7e300bd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 29 additions and 18 deletions

View File

@ -10,6 +10,7 @@
#include <string>
#include "xgboost/logging.h"
#include "io.h"
namespace xgboost {
namespace common {

View File

@ -19,8 +19,8 @@ using MemoryFixSizeBuffer = rabit::utils::MemoryFixSizeBuffer;
using MemoryBufferStream = rabit::utils::MemoryBufferStream;
/*!
* \brief Input stream that support additional PeekRead
* operation, besides read.
* \brief Input stream that support additional PeekRead operation,
* besides read.
*/
class PeekableInStream : public dmlc::Stream {
public:

View File

@ -121,7 +121,7 @@ void EllpackPageImpl::InitInfo(int device,
// Initialize the buffer to stored compressed features.
void EllpackPageImpl::InitCompressedData(int device, size_t num_rows) {
int num_symbols = matrix.info.n_bins + 1;
size_t num_symbols = matrix.info.n_bins + 1;
// Required buffer size for storing data matrix in ELLPack format.
size_t compressed_size_bytes = common::CompressedBufferWriter::CalculateBufferSize(

View File

@ -140,7 +140,7 @@ struct RowStateOnDevice {
// to begin processing on each device
class DeviceHistogramBuilderState {
public:
explicit DeviceHistogramBuilderState(int n_rows) : device_row_state_(n_rows) {}
explicit DeviceHistogramBuilderState(size_t n_rows) : device_row_state_(n_rows) {}
const RowStateOnDevice& GetRowStateOnDevice() const {
return device_row_state_;

View File

@ -41,6 +41,7 @@ class SparseBatchIteratorImpl : public BatchIteratorImpl<T> {
BatchSet<SparsePage> SparsePageDMatrix::GetRowBatches() {
auto cast = dynamic_cast<SparsePageSource<SparsePage>*>(row_source_.get());
CHECK(cast);
cast->BeforeFirst();
cast->Next();
auto begin_iter = BatchIterator<SparsePage>(

View File

@ -1,5 +1,5 @@
/*!
* Copyright (c) 2014 by Contributors
* Copyright (c) 2014-2019 by Contributors
* \file page_csr_source.h
* External memory data source, saved with sparse_batch_page binary format.
* \author Tianqi Chen
@ -7,8 +7,6 @@
#ifndef XGBOOST_DATA_SPARSE_PAGE_SOURCE_H_
#define XGBOOST_DATA_SPARSE_PAGE_SOURCE_H_
#include <xgboost/base.h>
#include <xgboost/data.h>
#include <dmlc/threadediter.h>
#include <dmlc/timer.h>
@ -20,6 +18,9 @@
#include <utility>
#include <vector>
#include "xgboost/base.h"
#include "xgboost/data.h"
#include "sparse_page_writer.h"
#include "../common/common.h"

View File

@ -1,5 +1,5 @@
/*!
* Copyright (c) 2014 by Contributors
* Copyright (c) 2014-2019 by Contributors
* \file sparse_page_writer.h
* \author Tianqi Chen
*/

View File

@ -34,7 +34,7 @@ DMLC_REGISTRY_FILE_TAG(rank_obj_gpu);
#endif // defined(XGBOOST_USE_CUDA)
struct LambdaRankParam : public XGBoostParameter<LambdaRankParam> {
int num_pairsample;
size_t num_pairsample;
float fix_list_weight;
// declare parameters
DMLC_DECLARE_PARAMETER(LambdaRankParam) {
@ -337,7 +337,7 @@ class SortedLabelList {
// Launch a kernel that populates the segment information for the different groups
uint32_t *gsegs = group_segments_.data().get();
const unsigned *dgroups = dgroups_.data().get();
int ngroups = dgroups_.size();
size_t ngroups = dgroups_.size();
dh::LaunchN(device_id_, num_elems, nullptr, [=] __device__(unsigned idx){
// Find the group first
int group_idx = dh::UpperBound(dgroups, ngroups, idx);
@ -405,10 +405,10 @@ class SortedLabelList {
float weight_normalization_factor) {
// Group info on device
const unsigned *dgroups = dgroups_.data().get();
int ngroups = dgroups_.size();
size_t ngroups = dgroups_.size();
uint32_t total_items = group_segments_.size();
int niter = param_.num_pairsample * total_items;
auto total_items = group_segments_.size();
size_t niter = param_.num_pairsample * total_items;
float fix_list_weight = param_.fix_list_weight;

View File

@ -608,17 +608,25 @@ class ColMaker: public TreeUpdater {
poption = static_cast<int>(num_features) * 2 < this->nthread_ ? 1 : 0;
}
if (poption == 0) {
std::vector<float> densities(num_features);
CHECK_EQ(feat_set.size(), num_features);
for (bst_omp_uint i = 0; i < num_features; ++i) {
int32_t const fid = feat_set[i];
densities.at(i) = p_fmat->GetColDensity(fid);
}
#pragma omp parallel for schedule(dynamic, batch_size)
for (bst_omp_uint i = 0; i < num_features; ++i) {
int fid = feat_set[i];
const int tid = omp_get_thread_num();
int32_t const fid = feat_set[i];
int32_t const tid = omp_get_thread_num();
auto c = batch[fid];
const bool ind = c.size() != 0 && c[0].fvalue == c[c.size() - 1].fvalue;
if (param_.NeedForwardSearch(p_fmat->GetColDensity(fid), ind)) {
auto const density = densities[i];
if (param_.NeedForwardSearch(density, ind)) {
this->EnumerateSplit(c.data(), c.data() + c.size(), +1,
fid, gpair, info, stemp_[tid]);
}
if (param_.NeedBackwardSearch(p_fmat->GetColDensity(fid), ind)) {
if (param_.NeedBackwardSearch(density, ind)) {
this->EnumerateSplit(c.data() + c.size() - 1, c.data() - 1, -1,
fid, gpair, info, stemp_[tid]);
}