Optimize DMatrix build time. (#5877)

Co-authored-by: SHVETS, KIRILL <kirill.shvets@intel.com>
This commit is contained in:
ShvetsKS
2020-08-19 20:37:03 +03:00
committed by GitHub
parent 29b7fea572
commit 24f2e6c97e
3 changed files with 70 additions and 40 deletions

View File

@@ -840,10 +840,11 @@ uint64_t SparsePage::Push(const AdapterBatchT& batch, float missing, int nthread
// Set number of threads but keep old value so we can reset it after
const int nthreadmax = omp_get_max_threads();
if (nthread <= 0) nthread = nthreadmax;
int nthread_original = omp_get_max_threads();
const int nthread_original = omp_get_max_threads();
omp_set_num_threads(nthread);
auto& offset_vec = offset.HostVector();
auto& data_vec = data.HostVector();
size_t builder_base_row_offset = this->Size();
common::ParallelGroupBuilder<
Entry, std::remove_reference<decltype(offset_vec)>::type::value_type>
@@ -858,48 +859,74 @@ uint64_t SparsePage::Push(const AdapterBatchT& batch, float missing, int nthread
last_line.GetElement(last_line.Size() - 1).row_idx - base_rowid;
}
}
builder.InitBudget(expected_rows, nthread);
uint64_t max_columns = 0;
// First-pass over the batch counting valid elements
size_t batch_size = batch.Size();
#pragma omp parallel for schedule(static)
for (omp_ulong i = 0; i < static_cast<omp_ulong>(batch_size);
++i) { // NOLINT(*)
int tid = omp_get_thread_num();
auto line = batch.GetLine(i);
for (auto j = 0ull; j < line.Size(); j++) {
data::COOTuple element = line.GetElement(j);
max_columns =
std::max(max_columns, static_cast<uint64_t>(element.column_idx + 1));
if (!common::CheckNAN(element.value) && element.value != missing) {
size_t key = element.row_idx - base_rowid;
// Adapter row index is absolute, here we want it relative to
// current page
CHECK_GE(key, builder_base_row_offset);
builder.AddBudget(key, tid);
}
}
const size_t thread_size = batch_size/nthread;
builder.InitBudget(expected_rows+1, nthread);
uint64_t max_columns = 0;
if (batch_size == 0) {
omp_set_num_threads(nthread_original);
return max_columns;
}
std::vector<std::vector<uint64_t>> max_columns_vector(nthread);
dmlc::OMPException exec;
// First-pass over the batch counting valid elements
#pragma omp parallel num_threads(nthread)
{
exec.Run([&]() {
int tid = omp_get_thread_num();
size_t begin = tid*thread_size;
size_t end = tid != (nthread-1) ? (tid+1)*thread_size : batch_size;
max_columns_vector[tid].resize(1, 0);
uint64_t& max_columns_local = max_columns_vector[tid][0];
for (size_t i = begin; i < end; ++i) {
auto line = batch.GetLine(i);
for (auto j = 0ull; j < line.Size(); j++) {
auto element = line.GetElement(j);
const size_t key = element.row_idx - base_rowid;
CHECK_GE(key, builder_base_row_offset);
max_columns_local =
std::max(max_columns_local, static_cast<uint64_t>(element.column_idx + 1));
if (!common::CheckNAN(element.value) && element.value != missing) {
// Adapter row index is absolute, here we want it relative to
// current page
builder.AddBudget(key, tid);
}
}
}
});
}
exec.Rethrow();
for (const auto & max : max_columns_vector) {
max_columns = std::max(max_columns, max[0]);
}
builder.InitStorage();
// Second pass over batch, placing elements in correct position
#pragma omp parallel for schedule(static)
for (omp_ulong i = 0; i < static_cast<omp_ulong>(batch_size);
++i) { // NOLINT(*)
int tid = omp_get_thread_num();
auto line = batch.GetLine(i);
for (auto j = 0ull; j < line.Size(); j++) {
auto element = line.GetElement(j);
if (!common::CheckNAN(element.value) && element.value != missing) {
size_t key = element.row_idx -
base_rowid; // Adapter row index is absolute, here we want
// it relative to current page
builder.Push(key, Entry(element.column_idx, element.value), tid);
#pragma omp parallel num_threads(nthread)
{
exec.Run([&]() {
int tid = omp_get_thread_num();
size_t begin = tid*thread_size;
size_t end = tid != (nthread-1) ? (tid+1)*thread_size : batch_size;
for (size_t i = begin; i < end; ++i) {
auto line = batch.GetLine(i);
for (auto j = 0ull; j < line.Size(); j++) {
auto element = line.GetElement(j);
const size_t key = (element.row_idx - base_rowid);
if (!common::CheckNAN(element.value) && element.value != missing) {
builder.Push(key, Entry(element.column_idx, element.value), tid);
}
}
}
}
});
}
exec.Rethrow();
omp_set_num_threads(nthread_original);
return max_columns;
}