Fix external memory for get column batches. (#4622)

* Fix external memory for get column batches.

This fixes two bugs:

* Use PushCSC for get column batches.
* Don't remove the created temporary directory before finishing test.

* Check all pages.
This commit is contained in:
Jiaming Yuan
2019-06-30 09:56:49 +08:00
committed by GitHub
parent a30176907f
commit 45876bf41b
14 changed files with 90 additions and 46 deletions

View File

@@ -1,6 +1,9 @@
#include <dmlc/filesystem.h>
#include <gtest/gtest.h>
#include "../../../src/common/column_matrix.h"
#include "../helpers.h"
#include "gtest/gtest.h"
namespace xgboost {
namespace common {
@@ -51,10 +54,11 @@ TEST(DenseColumnWithMissing, Test) {
delete dmat;
}
void
TestGHistIndexMatrixCreation(size_t nthreads) {
void TestGHistIndexMatrixCreation(size_t nthreads) {
dmlc::TemporaryDirectory tmpdir;
std::string filename = tmpdir.path + "/big.libsvm";
/* This should create multiple sparse pages */
std::unique_ptr<DMatrix> dmat = CreateSparsePageDMatrix(1024, 1024);
std::unique_ptr<DMatrix> dmat{ CreateSparsePageDMatrix(1024, 1024, filename) };
omp_set_num_threads(nthreads);
GHistIndexMatrix gmat;
gmat.Init(dmat.get(), 256);

View File

@@ -1,7 +1,9 @@
#include <dmlc/filesystem.h>
#include <gtest/gtest.h>
#include <algorithm>
#include <cmath>
#include "gtest/gtest.h"
#include <thrust/device_vector.h>
#include <thrust/iterator/counting_iterator.h>
@@ -22,10 +24,12 @@ void TestDeviceSketch(const GPUSet& devices, bool use_external_memory) {
std::shared_ptr<xgboost::DMatrix> *dmat = nullptr;
size_t num_cols = 1;
dmlc::TemporaryDirectory tmpdir;
std::string file = tmpdir.path + "/big.libsvm";
if (use_external_memory) {
auto sp_dmat = CreateSparsePageDMatrix(nrows * 3, 128UL); // 3 entries/row
dmat = new std::shared_ptr<xgboost::DMatrix>(std::move(sp_dmat));
num_cols = 5;
auto sp_dmat = CreateSparsePageDMatrix(nrows * 3, 128UL, file); // 3 entries/row
dmat = new std::shared_ptr<xgboost::DMatrix>(std::move(sp_dmat));
num_cols = 5;
} else {
std::vector<float> test_data(nrows);
auto count_iter = thrust::make_counting_iterator(0);