Fix external memory for get column batches. (#4622)

* Fix external memory for get column batches.

This fixes two bugs:

* Use PushCSC for get column batches.
* Don't remove the created temporary directory before finishing test.

* Check all pages.
This commit is contained in:
Jiaming Yuan
2019-06-30 09:56:49 +08:00
committed by GitHub
parent a30176907f
commit 45876bf41b
14 changed files with 90 additions and 46 deletions

View File

@@ -1,4 +1,5 @@
#include <gtest/gtest.h>
#include <dmlc/filesystem.h>
#include <vector>
#include "xgboost/data.h"
@@ -55,8 +56,11 @@ TEST(SparsePage, PushCSC) {
}
TEST(SparsePage, PushCSCAfterTranspose) {
dmlc::TemporaryDirectory tmpdir;
std::string filename = tmpdir.path + "/big.libsvm";
const int n_entries = 9;
std::unique_ptr<DMatrix> dmat = CreateSparsePageDMatrix(n_entries, 64UL);
std::unique_ptr<DMatrix> dmat =
CreateSparsePageDMatrix(n_entries, 64UL, filename);
const int ncols = dmat->Info().num_col_;
SparsePage page; // Consolidated sparse page
for (const auto &batch : dmat->GetRowBatches()) {
@@ -70,7 +74,7 @@ TEST(SparsePage, PushCSCAfterTranspose) {
// The feature value for a feature in each row should be identical, as that is
// how the dmatrix has been created
for (int i = 0; i < page.Size(); ++i) {
for (size_t i = 0; i < page.Size(); ++i) {
auto inst = page[i];
for (int j = 1; j < inst.size(); ++j) {
ASSERT_EQ(inst[0].fvalue, inst[j].fvalue);