- fix issues with training with external memory on cpu (#4487)

* - fix issues with training with external memory on cpu
   - use the batch size to determine the correct number of rows in a batch
   - use the right number of threads in omp parallalization if the batch size
     is less than the default omp max threads (applicable for the last batch)

* - handle scenarios where last batch size is < available number of threads
- augment tests such that we can test all scenarios (batch size <, >, = number of threads)
This commit is contained in:
sriramch
2019-05-28 17:31:30 -07:00
committed by Rory Mitchell
parent 972f693eaf
commit a3fedbeaa8
2 changed files with 34 additions and 14 deletions

View File

@@ -50,5 +50,23 @@ TEST(DenseColumnWithMissing, Test) {
}
delete dmat;
}
void
TestGHistIndexMatrixCreation(size_t nthreads) {
/* This should create multiple sparse pages */
std::unique_ptr<DMatrix> dmat = CreateSparsePageDMatrix(1024, 1024);
omp_set_num_threads(nthreads);
GHistIndexMatrix gmat;
gmat.Init(dmat.get(), 256);
}
TEST(HistIndexCreationWithExternalMemory, Test) {
// Vary the number of threads to make sure that the last batch
// is distributed properly to the available number of threads
// in the thread pool
TestGHistIndexMatrixCreation(20);
TestGHistIndexMatrixCreation(30);
TestGHistIndexMatrixCreation(40);
}
} // namespace common
} // namespace xgboost