Fix #3708: Use dmlc::TemporaryDirectory to handle temporaries in cross-platform way (#3783)

* Fix #3708: Use dmlc::TemporaryDirectory to handle temporaries in cross-platform way

Also install git inside NVIDIA GPU container

* Update dmlc-core
This commit is contained in:
Philip Hyunsu Cho
2018-10-18 10:16:04 -07:00
committed by GitHub
parent 55ee9a92a1
commit abf2f661be
9 changed files with 40 additions and 61 deletions

View File

@@ -1,5 +1,6 @@
// Copyright by Contributors
#include <dmlc/io.h>
#include <dmlc/filesystem.h>
#include <xgboost/data.h>
#include <string>
#include <memory>
@@ -48,8 +49,9 @@ TEST(MetaInfo, SaveLoadBinary) {
info.num_row_ = 2;
info.num_col_ = 1;
std::string tmp_file = TempFileName();
dmlc::Stream * fs = dmlc::Stream::Create(tmp_file.c_str(), "w");
dmlc::TemporaryDirectory tempdir;
const std::string tmp_file = tempdir.path + "/metainfo.binary";
dmlc::Stream* fs = dmlc::Stream::Create(tmp_file.c_str(), "w");
info.SaveBinary(fs);
delete fs;
@@ -62,14 +64,12 @@ TEST(MetaInfo, SaveLoadBinary) {
EXPECT_EQ(inforead.labels_.HostVector(), info.labels_.HostVector());
EXPECT_EQ(inforead.num_col_, info.num_col_);
EXPECT_EQ(inforead.num_row_, info.num_row_);
std::remove(tmp_file.c_str());
delete fs;
}
TEST(MetaInfo, LoadQid) {
std::string tmp_file = TempFileName();
dmlc::TemporaryDirectory tempdir;
std::string tmp_file = tempdir.path + "/qid_test.libsvm";
{
std::unique_ptr<dmlc::Stream> fs(
dmlc::Stream::Create(tmp_file.c_str(), "w"));
@@ -90,7 +90,6 @@ TEST(MetaInfo, LoadQid) {
}
std::unique_ptr<xgboost::DMatrix> dmat(
xgboost::DMatrix::Load(tmp_file, true, false, "libsvm"));
std::remove(tmp_file.c_str());
const xgboost::MetaInfo& info = dmat->Info();
const std::vector<uint64_t> expected_qids{1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3};

View File

@@ -1,18 +1,19 @@
// Copyright by Contributors
#include <xgboost/data.h>
#include <dmlc/filesystem.h>
#include "../../../src/data/simple_csr_source.h"
#include "../helpers.h"
TEST(SimpleCSRSource, SaveLoadBinary) {
std::string tmp_file = CreateSimpleTestData();
dmlc::TemporaryDirectory tempdir;
const std::string tmp_file = tempdir.path + "/simple.libsvm";
CreateSimpleTestData(tmp_file);
xgboost::DMatrix * dmat = xgboost::DMatrix::Load(tmp_file, true, false);
std::remove(tmp_file.c_str());
std::string tmp_binfile = TempFileName();
const std::string tmp_binfile = tempdir.path + "/csr_source.binary";
dmat->SaveToLocalFile(tmp_binfile);
xgboost::DMatrix * dmat_read = xgboost::DMatrix::Load(tmp_binfile, true, false);
std::remove(tmp_binfile.c_str());
EXPECT_EQ(dmat->Info().num_col_, dmat_read->Info().num_col_);
EXPECT_EQ(dmat->Info().num_row_, dmat_read->Info().num_row_);

View File

@@ -1,13 +1,15 @@
// Copyright by Contributors
#include <xgboost/data.h>
#include <dmlc/filesystem.h>
#include "../../../src/data/simple_dmatrix.h"
#include "../helpers.h"
TEST(SimpleDMatrix, MetaInfo) {
std::string tmp_file = CreateSimpleTestData();
dmlc::TemporaryDirectory tempdir;
const std::string tmp_file = tempdir.path + "/simple.libsvm";
CreateSimpleTestData(tmp_file);
xgboost::DMatrix * dmat = xgboost::DMatrix::Load(tmp_file, true, false);
std::remove(tmp_file.c_str());
// Test the metadata that was parsed
EXPECT_EQ(dmat->Info().num_row_, 2);
@@ -19,9 +21,10 @@ TEST(SimpleDMatrix, MetaInfo) {
}
TEST(SimpleDMatrix, RowAccess) {
std::string tmp_file = CreateSimpleTestData();
dmlc::TemporaryDirectory tempdir;
const std::string tmp_file = tempdir.path + "/simple.libsvm";
CreateSimpleTestData(tmp_file);
xgboost::DMatrix * dmat = xgboost::DMatrix::Load(tmp_file, false, false);
std::remove(tmp_file.c_str());
// Loop over the batches and count the records
long row_count = 0;
@@ -40,9 +43,10 @@ TEST(SimpleDMatrix, RowAccess) {
}
TEST(SimpleDMatrix, ColAccessWithoutBatches) {
std::string tmp_file = CreateSimpleTestData();
dmlc::TemporaryDirectory tempdir;
const std::string tmp_file = tempdir.path + "/simple.libsvm";
CreateSimpleTestData(tmp_file);
xgboost::DMatrix * dmat = xgboost::DMatrix::Load(tmp_file, true, false);
std::remove(tmp_file.c_str());
// Sorted column access
EXPECT_EQ(dmat->GetColDensity(0), 1);

View File

@@ -1,11 +1,14 @@
// Copyright by Contributors
#include <xgboost/data.h>
#include <dmlc/filesystem.h>
#include "../../../src/data/sparse_page_dmatrix.h"
#include "../helpers.h"
TEST(SparsePageDMatrix, MetaInfo) {
std::string tmp_file = CreateSimpleTestData();
dmlc::TemporaryDirectory tempdir;
const std::string tmp_file = tempdir.path + "/simple.libsvm";
CreateSimpleTestData(tmp_file);
xgboost::DMatrix * dmat = xgboost::DMatrix::Load(
tmp_file + "#" + tmp_file + ".cache", false, false);
std::cout << tmp_file << std::endl;
@@ -17,20 +20,16 @@ TEST(SparsePageDMatrix, MetaInfo) {
EXPECT_EQ(dmat->Info().num_nonzero_, 6);
EXPECT_EQ(dmat->Info().labels_.Size(), dmat->Info().num_row_);
// Clean up of external memory files
std::remove(tmp_file.c_str());
std::remove((tmp_file + ".cache").c_str());
std::remove((tmp_file + ".cache.row.page").c_str());
delete dmat;
}
TEST(SparsePageDMatrix, RowAccess) {
// Create sufficiently large data to make two row pages
std::string tmp_file = CreateBigTestData(5000000);
dmlc::TemporaryDirectory tempdir;
const std::string tmp_file = tempdir.path + "/big.libsvm";
CreateBigTestData(tmp_file, 5000000);
xgboost::DMatrix * dmat = xgboost::DMatrix::Load(
tmp_file + "#" + tmp_file + ".cache", true, false);
std::remove(tmp_file.c_str());
EXPECT_TRUE(FileExists(tmp_file + ".cache.row.page"));
// Loop over the batches and count the records
@@ -47,18 +46,15 @@ TEST(SparsePageDMatrix, RowAccess) {
EXPECT_EQ(first_row[2].index, 2);
EXPECT_EQ(first_row[2].fvalue, 20);
// Clean up of external memory files
std::remove((tmp_file + ".cache").c_str());
std::remove((tmp_file + ".cache.row.page").c_str());
delete dmat;
}
TEST(SparsePageDMatrix, ColAccess) {
std::string tmp_file = CreateSimpleTestData();
dmlc::TemporaryDirectory tempdir;
const std::string tmp_file = tempdir.path + "/simple.libsvm";
CreateSimpleTestData(tmp_file);
xgboost::DMatrix * dmat = xgboost::DMatrix::Load(
tmp_file + "#" + tmp_file + ".cache", true, false);
std::remove(tmp_file.c_str());
EXPECT_EQ(dmat->GetColDensity(0), 1);
EXPECT_EQ(dmat->GetColDensity(1), 0.5);
@@ -82,10 +78,5 @@ TEST(SparsePageDMatrix, ColAccess) {
EXPECT_TRUE(FileExists(tmp_file + ".cache.col.page"));
EXPECT_TRUE(FileExists(tmp_file + ".cache.sorted.col.page"));
std::remove((tmp_file + ".cache").c_str());
std::remove((tmp_file + ".cache.row.page").c_str());
std::remove((tmp_file + ".cache.col.page").c_str());
std::remove((tmp_file + ".cache.sorted.col.page").c_str());
delete dmat;
}