Multi-threaded XGDMatrixCreateFromMat for faster DMatrix creation (#2530)
* Multi-threaded XGDMatrixCreateFromMat for faster DMatrix creation from numpy arrays for python interface.
This commit is contained in:
committed by
Rory Mitchell
parent
56550ff3f1
commit
6b375f6ad8
@@ -290,6 +290,7 @@ XGB_DLL int XGDMatrixCreateFromCSCEx(const size_t* col_ptr,
|
||||
std::unique_ptr<data::SimpleCSRSource> source(new data::SimpleCSRSource());
|
||||
|
||||
API_BEGIN();
|
||||
// FIXME: User should be able to control number of threads
|
||||
const int nthread = omp_get_max_threads();
|
||||
data::SimpleCSRSource& mat = *source;
|
||||
common::ParallelGroupBuilder<RowBatch::Entry> builder(&mat.row_ptr_, &mat.row_data_);
|
||||
@@ -350,24 +351,159 @@ XGB_DLL int XGDMatrixCreateFromMat(const bst_float* data,
|
||||
|
||||
API_BEGIN();
|
||||
data::SimpleCSRSource& mat = *source;
|
||||
mat.row_ptr_.resize(1+nrow);
|
||||
bool nan_missing = common::CheckNAN(missing);
|
||||
mat.info.num_row = nrow;
|
||||
mat.info.num_col = ncol;
|
||||
const bst_float* data0 = data;
|
||||
|
||||
// count elements for sizing data
|
||||
data = data0;
|
||||
for (xgboost::bst_ulong i = 0; i < nrow; ++i, data += ncol) {
|
||||
xgboost::bst_ulong nelem = 0;
|
||||
for (xgboost::bst_ulong j = 0; j < ncol; ++j) {
|
||||
if (common::CheckNAN(data[j])) {
|
||||
CHECK(nan_missing)
|
||||
<< "There are NAN in the matrix, however, you did not set missing=NAN";
|
||||
<< "There are NAN in the matrix, however, you did not set missing=NAN";
|
||||
} else {
|
||||
if (nan_missing || data[j] != missing) {
|
||||
mat.row_data_.push_back(RowBatch::Entry(j, data[j]));
|
||||
++nelem;
|
||||
}
|
||||
}
|
||||
}
|
||||
mat.row_ptr_.push_back(mat.row_ptr_.back() + nelem);
|
||||
mat.row_ptr_[i+1] = mat.row_ptr_[i] + nelem;
|
||||
}
|
||||
mat.row_data_.resize(mat.row_data_.size() + mat.row_ptr_.back());
|
||||
|
||||
data = data0;
|
||||
for (xgboost::bst_ulong i = 0; i < nrow; ++i, data += ncol) {
|
||||
xgboost::bst_ulong matj = 0;
|
||||
for (xgboost::bst_ulong j = 0; j < ncol; ++j) {
|
||||
if (common::CheckNAN(data[j])) {
|
||||
} else {
|
||||
if (nan_missing || data[j] != missing) {
|
||||
mat.row_data_[mat.row_ptr_[i] + matj] = RowBatch::Entry(j, data[j]);
|
||||
++matj;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
mat.info.num_nonzero = mat.row_data_.size();
|
||||
*out = new std::shared_ptr<DMatrix>(DMatrix::Create(std::move(source)));
|
||||
API_END();
|
||||
}
|
||||
|
||||
void prefixsum_inplace(size_t *x, size_t N) {
|
||||
size_t *suma;
|
||||
#pragma omp parallel
|
||||
{
|
||||
const int ithread = omp_get_thread_num();
|
||||
const int nthreads = omp_get_num_threads();
|
||||
#pragma omp single
|
||||
{
|
||||
suma = new size_t[nthreads+1];
|
||||
suma[0] = 0;
|
||||
}
|
||||
size_t sum = 0;
|
||||
#pragma omp for schedule(static)
|
||||
for (omp_ulong i = 0; i < N; i++) {
|
||||
sum += x[i];
|
||||
x[i] = sum;
|
||||
}
|
||||
suma[ithread+1] = sum;
|
||||
#pragma omp barrier
|
||||
size_t offset = 0;
|
||||
for (omp_ulong i = 0; i < (ithread+1); i++) {
|
||||
offset += suma[i];
|
||||
}
|
||||
#pragma omp for schedule(static)
|
||||
for (omp_ulong i = 0; i < N; i++) {
|
||||
x[i] += offset;
|
||||
}
|
||||
}
|
||||
delete[] suma;
|
||||
}
|
||||
|
||||
|
||||
XGB_DLL int XGDMatrixCreateFromMat_omp(const bst_float* data,
|
||||
xgboost::bst_ulong nrow,
|
||||
xgboost::bst_ulong ncol,
|
||||
bst_float missing,
|
||||
DMatrixHandle* out,
|
||||
int nthread) {
|
||||
// avoid openmp unless enough data to be worth it to avoid overhead costs
|
||||
if (nrow*ncol <= 10000*50) {
|
||||
return(XGDMatrixCreateFromMat(data, nrow, ncol, missing, out));
|
||||
}
|
||||
|
||||
API_BEGIN();
|
||||
const int nthreadmax = std::max(omp_get_num_procs() / 2 - 1, 1);
|
||||
// const int nthreadmax = omp_get_max_threads();
|
||||
if (nthread <= 0) nthread=nthreadmax;
|
||||
omp_set_num_threads(nthread);
|
||||
xgboost::bst_ulong nrow_reserve_per_thread = std::ceil(nrow/static_cast<double>(nthread));
|
||||
|
||||
std::unique_ptr<data::SimpleCSRSource> source(new data::SimpleCSRSource());
|
||||
data::SimpleCSRSource& mat = *source;
|
||||
mat.row_ptr_.resize(1+nrow);
|
||||
mat.info.num_row = nrow;
|
||||
mat.info.num_col = ncol;
|
||||
|
||||
// Check for errors in missing elements
|
||||
// Count elements per row (to avoid otherwise need to copy)
|
||||
bool nan_missing = common::CheckNAN(missing);
|
||||
int *badnan;
|
||||
badnan = new int[nthread];
|
||||
for (int i = 0; i < nthread; i++) {
|
||||
badnan[i] = 0;
|
||||
}
|
||||
|
||||
#pragma omp parallel num_threads(nthread)
|
||||
{
|
||||
int ithread = omp_get_thread_num();
|
||||
|
||||
// Count elements per row
|
||||
#pragma omp for schedule(static)
|
||||
for (omp_ulong i = 0; i < nrow; ++i) {
|
||||
xgboost::bst_ulong nelem = 0;
|
||||
for (xgboost::bst_ulong j = 0; j < ncol; ++j) {
|
||||
if (common::CheckNAN(data[ncol*i + j]) && !nan_missing) {
|
||||
badnan[ithread] = 1;
|
||||
} else if (common::CheckNAN(data[ncol * i + j])) {
|
||||
} else if (nan_missing || data[ncol * i + j] != missing) {
|
||||
++nelem;
|
||||
}
|
||||
}
|
||||
mat.row_ptr_[i+1] = nelem;
|
||||
}
|
||||
}
|
||||
// Inform about any NaNs and resize data matrix
|
||||
for (int i = 0; i < nthread; i++) {
|
||||
CHECK(!badnan[i]) << "There are NAN in the matrix, however, you did not set missing=NAN";
|
||||
}
|
||||
|
||||
// do cumulative sum (to avoid otherwise need to copy)
|
||||
prefixsum_inplace(&mat.row_ptr_[0], mat.row_ptr_.size());
|
||||
mat.row_data_.resize(mat.row_data_.size() + mat.row_ptr_.back());
|
||||
|
||||
// Fill data matrix (now that know size, no need for slow push_back())
|
||||
#pragma omp parallel num_threads(nthread)
|
||||
{
|
||||
#pragma omp for schedule(static)
|
||||
for (omp_ulong i = 0; i < nrow; ++i) {
|
||||
xgboost::bst_ulong matj = 0;
|
||||
for (xgboost::bst_ulong j = 0; j < ncol; ++j) {
|
||||
if (common::CheckNAN(data[ncol * i + j])) {
|
||||
} else if (nan_missing || data[ncol * i + j] != missing) {
|
||||
mat.row_data_[mat.row_ptr_[i] + matj] =
|
||||
RowBatch::Entry(j, data[ncol * i + j]);
|
||||
++matj;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
mat.info.num_nonzero = mat.row_data_.size();
|
||||
*out = new std::shared_ptr<DMatrix>(DMatrix::Create(std::move(source)));
|
||||
API_END();
|
||||
|
||||
Reference in New Issue
Block a user