simplify and parallelize data builder

This commit is contained in:
tqchen
2015-04-15 13:42:03 -07:00
parent 6370b38c14
commit 3d8431fc5c
5 changed files with 55 additions and 328 deletions

View File

@@ -19,7 +19,7 @@ using namespace std;
#include "../src/learner/learner-inl.hpp"
#include "../src/io/io.h"
#include "../src/utils/utils.h"
#include "../src/utils/matrix_csr.h"
#include "../src/utils/group_data.h"
#include "../src/io/simple_dmatrix-inl.hpp"
using namespace xgboost;
@@ -139,20 +139,32 @@ extern "C"{
const float *data,
bst_ulong nindptr,
bst_ulong nelem) {
int nthread;
#pragma omp parallel
{
nthread = omp_get_num_threads();
}
DMatrixSimple *p_mat = new DMatrixSimple();
DMatrixSimple &mat = *p_mat;
utils::SparseCSRMBuilder<RowBatch::Entry, false> builder(mat.row_ptr_, mat.row_data_);
builder.InitBudget();
utils::ParallelGroupBuilder<RowBatch::Entry> builder(&mat.row_ptr_, &mat.row_data_);
builder.InitBudget(0, nthread);
bst_ulong ncol = nindptr - 1;
#pragma omp parallel for schedule(static)
for (bst_ulong i = 0; i < ncol; ++i) {
int tid = omp_get_thread_num();
for (unsigned j = col_ptr[i]; j < col_ptr[i+1]; ++j) {
builder.AddBudget(indices[j]);
builder.AddBudget(indices[j], tid);
}
}
builder.InitStorage();
#pragma omp parallel for schedule(static)
for (bst_ulong i = 0; i < ncol; ++i) {
int tid = omp_get_thread_num();
for (unsigned j = col_ptr[i]; j < col_ptr[i+1]; ++j) {
builder.PushElem(indices[j], RowBatch::Entry(static_cast<bst_uint>(i), data[j]));
builder.Push(indices[j],
RowBatch::Entry(static_cast<bst_uint>(i), data[j]),
tid);
}
}
mat.info.info.num_row = mat.row_ptr_.size() - 1;