Fix external memory for get column batches. (#4622)
* Fix external memory for get column batches. This fixes two bugs: * Use PushCSC for get column batches. * Don't remove the created temporary directory before finishing test. * Check all pages.
This commit is contained in:
@@ -1,4 +1,5 @@
|
||||
#include <gtest/gtest.h>
|
||||
#include <dmlc/filesystem.h>
|
||||
#include <vector>
|
||||
|
||||
#include "xgboost/data.h"
|
||||
@@ -55,8 +56,11 @@ TEST(SparsePage, PushCSC) {
|
||||
}
|
||||
|
||||
TEST(SparsePage, PushCSCAfterTranspose) {
|
||||
dmlc::TemporaryDirectory tmpdir;
|
||||
std::string filename = tmpdir.path + "/big.libsvm";
|
||||
const int n_entries = 9;
|
||||
std::unique_ptr<DMatrix> dmat = CreateSparsePageDMatrix(n_entries, 64UL);
|
||||
std::unique_ptr<DMatrix> dmat =
|
||||
CreateSparsePageDMatrix(n_entries, 64UL, filename);
|
||||
const int ncols = dmat->Info().num_col_;
|
||||
SparsePage page; // Consolidated sparse page
|
||||
for (const auto &batch : dmat->GetRowBatches()) {
|
||||
@@ -70,7 +74,7 @@ TEST(SparsePage, PushCSCAfterTranspose) {
|
||||
|
||||
// The feature value for a feature in each row should be identical, as that is
|
||||
// how the dmatrix has been created
|
||||
for (int i = 0; i < page.Size(); ++i) {
|
||||
for (size_t i = 0; i < page.Size(); ++i) {
|
||||
auto inst = page[i];
|
||||
for (int j = 1; j < inst.size(); ++j) {
|
||||
ASSERT_EQ(inst[0].fvalue, inst[j].fvalue);
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
// Copyright by Contributors
|
||||
#include <dmlc/filesystem.h>
|
||||
#include <xgboost/data.h>
|
||||
#include <dmlc/filesystem.h>
|
||||
#include <cinttypes>
|
||||
@@ -26,7 +27,10 @@ TEST(SparsePageDMatrix, MetaInfo) {
|
||||
}
|
||||
|
||||
TEST(SparsePageDMatrix, RowAccess) {
|
||||
std::unique_ptr<xgboost::DMatrix> dmat = xgboost::CreateSparsePageDMatrix(12, 64);
|
||||
dmlc::TemporaryDirectory tmpdir;
|
||||
std::string filename = tmpdir.path + "/big.libsvm";
|
||||
std::unique_ptr<xgboost::DMatrix> dmat =
|
||||
xgboost::CreateSparsePageDMatrix(12, 64, filename);
|
||||
|
||||
// Test the data read into the first row
|
||||
auto &batch = *dmat->GetRowBatches().begin();
|
||||
@@ -67,3 +71,19 @@ TEST(SparsePageDMatrix, ColAccess) {
|
||||
|
||||
delete dmat;
|
||||
}
|
||||
|
||||
// Multi-batches access
|
||||
TEST(SparsePageDMatrix, ColAccessBatches) {
|
||||
dmlc::TemporaryDirectory tmpdir;
|
||||
std::string filename = tmpdir.path + "/big.libsvm";
|
||||
// Create multiple sparse pages
|
||||
std::unique_ptr<xgboost::DMatrix> dmat {
|
||||
xgboost::CreateSparsePageDMatrix(1024, 1024, filename)
|
||||
};
|
||||
auto n_threads = omp_get_max_threads();
|
||||
omp_set_num_threads(16);
|
||||
for (auto const& page : dmat->GetColumnBatches()) {
|
||||
ASSERT_EQ(dmat->Info().num_col_, page.Size());
|
||||
}
|
||||
omp_set_num_threads(n_threads);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user