From abf2f661bedb49245374c0c853231a5e91715b1c Mon Sep 17 00:00:00 2001 From: Philip Hyunsu Cho Date: Thu, 18 Oct 2018 10:16:04 -0700 Subject: [PATCH] Fix #3708: Use dmlc::TemporaryDirectory to handle temporaries in cross-platform way (#3783) * Fix #3708: Use dmlc::TemporaryDirectory to handle temporaries in cross-platform way Also install git inside NVIDIA GPU container * Update dmlc-core --- dmlc-core | 2 +- tests/ci_build/Dockerfile.gpu | 2 +- tests/ci_build/Dockerfile.release | 2 +- tests/cpp/data/test_metainfo.cc | 13 +++++----- tests/cpp/data/test_simple_csr_source.cc | 9 ++++--- tests/cpp/data/test_simple_dmatrix.cc | 16 +++++++----- tests/cpp/data/test_sparse_page_dmatrix.cc | 29 ++++++++-------------- tests/cpp/helpers.cc | 22 +++------------- tests/cpp/helpers.h | 6 ++--- 9 files changed, 40 insertions(+), 61 deletions(-) diff --git a/dmlc-core b/dmlc-core index e3377de93..4d49691f1 160000 --- a/dmlc-core +++ b/dmlc-core @@ -1 +1 @@ -Subproject commit e3377de933d3e069635150fb330ce6cc33b57950 +Subproject commit 4d49691f1a9d944c3b0aa5e63f1db3cad1f941f8 diff --git a/tests/ci_build/Dockerfile.gpu b/tests/ci_build/Dockerfile.gpu index adf7668a5..3805a66f8 100644 --- a/tests/ci_build/Dockerfile.gpu +++ b/tests/ci_build/Dockerfile.gpu @@ -7,7 +7,7 @@ ENV DEBIAN_FRONTEND noninteractive # Install all basic requirements RUN \ yum -y update && \ - yum install -y tar unzip wget xz && \ + yum install -y tar unzip wget xz git && \ wget http://people.centos.org/tru/devtools-2/devtools-2.repo -O /etc/yum.repos.d/devtools-2.repo && \ yum install -y devtoolset-2-gcc devtoolset-2-binutils devtoolset-2-gcc-c++ && \ # Python diff --git a/tests/ci_build/Dockerfile.release b/tests/ci_build/Dockerfile.release index 772e5f175..1d8228330 100644 --- a/tests/ci_build/Dockerfile.release +++ b/tests/ci_build/Dockerfile.release @@ -6,7 +6,7 @@ ENV DEBIAN_FRONTEND noninteractive # Install all basic requirements RUN \ yum -y update && \ - yum install -y graphviz tar unzip wget xz && \ + yum install -y graphviz tar unzip wget xz git && \ # Python wget https://repo.continuum.io/miniconda/Miniconda2-4.3.27-Linux-x86_64.sh && \ bash Miniconda2-4.3.27-Linux-x86_64.sh -b -p /opt/python diff --git a/tests/cpp/data/test_metainfo.cc b/tests/cpp/data/test_metainfo.cc index 9c13752fc..fe6bb2873 100644 --- a/tests/cpp/data/test_metainfo.cc +++ b/tests/cpp/data/test_metainfo.cc @@ -1,5 +1,6 @@ // Copyright by Contributors #include +#include #include #include #include @@ -48,8 +49,9 @@ TEST(MetaInfo, SaveLoadBinary) { info.num_row_ = 2; info.num_col_ = 1; - std::string tmp_file = TempFileName(); - dmlc::Stream * fs = dmlc::Stream::Create(tmp_file.c_str(), "w"); + dmlc::TemporaryDirectory tempdir; + const std::string tmp_file = tempdir.path + "/metainfo.binary"; + dmlc::Stream* fs = dmlc::Stream::Create(tmp_file.c_str(), "w"); info.SaveBinary(fs); delete fs; @@ -62,14 +64,12 @@ TEST(MetaInfo, SaveLoadBinary) { EXPECT_EQ(inforead.labels_.HostVector(), info.labels_.HostVector()); EXPECT_EQ(inforead.num_col_, info.num_col_); EXPECT_EQ(inforead.num_row_, info.num_row_); - - std::remove(tmp_file.c_str()); - delete fs; } TEST(MetaInfo, LoadQid) { - std::string tmp_file = TempFileName(); + dmlc::TemporaryDirectory tempdir; + std::string tmp_file = tempdir.path + "/qid_test.libsvm"; { std::unique_ptr fs( dmlc::Stream::Create(tmp_file.c_str(), "w")); @@ -90,7 +90,6 @@ TEST(MetaInfo, LoadQid) { } std::unique_ptr dmat( xgboost::DMatrix::Load(tmp_file, true, false, "libsvm")); - std::remove(tmp_file.c_str()); const xgboost::MetaInfo& info = dmat->Info(); const std::vector expected_qids{1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3}; diff --git a/tests/cpp/data/test_simple_csr_source.cc b/tests/cpp/data/test_simple_csr_source.cc index 71bd7716d..7798959dc 100644 --- a/tests/cpp/data/test_simple_csr_source.cc +++ b/tests/cpp/data/test_simple_csr_source.cc @@ -1,18 +1,19 @@ // Copyright by Contributors #include +#include #include "../../../src/data/simple_csr_source.h" #include "../helpers.h" TEST(SimpleCSRSource, SaveLoadBinary) { - std::string tmp_file = CreateSimpleTestData(); + dmlc::TemporaryDirectory tempdir; + const std::string tmp_file = tempdir.path + "/simple.libsvm"; + CreateSimpleTestData(tmp_file); xgboost::DMatrix * dmat = xgboost::DMatrix::Load(tmp_file, true, false); - std::remove(tmp_file.c_str()); - std::string tmp_binfile = TempFileName(); + const std::string tmp_binfile = tempdir.path + "/csr_source.binary"; dmat->SaveToLocalFile(tmp_binfile); xgboost::DMatrix * dmat_read = xgboost::DMatrix::Load(tmp_binfile, true, false); - std::remove(tmp_binfile.c_str()); EXPECT_EQ(dmat->Info().num_col_, dmat_read->Info().num_col_); EXPECT_EQ(dmat->Info().num_row_, dmat_read->Info().num_row_); diff --git a/tests/cpp/data/test_simple_dmatrix.cc b/tests/cpp/data/test_simple_dmatrix.cc index f403d7657..50b865258 100644 --- a/tests/cpp/data/test_simple_dmatrix.cc +++ b/tests/cpp/data/test_simple_dmatrix.cc @@ -1,13 +1,15 @@ // Copyright by Contributors #include +#include #include "../../../src/data/simple_dmatrix.h" #include "../helpers.h" TEST(SimpleDMatrix, MetaInfo) { - std::string tmp_file = CreateSimpleTestData(); + dmlc::TemporaryDirectory tempdir; + const std::string tmp_file = tempdir.path + "/simple.libsvm"; + CreateSimpleTestData(tmp_file); xgboost::DMatrix * dmat = xgboost::DMatrix::Load(tmp_file, true, false); - std::remove(tmp_file.c_str()); // Test the metadata that was parsed EXPECT_EQ(dmat->Info().num_row_, 2); @@ -19,9 +21,10 @@ TEST(SimpleDMatrix, MetaInfo) { } TEST(SimpleDMatrix, RowAccess) { - std::string tmp_file = CreateSimpleTestData(); + dmlc::TemporaryDirectory tempdir; + const std::string tmp_file = tempdir.path + "/simple.libsvm"; + CreateSimpleTestData(tmp_file); xgboost::DMatrix * dmat = xgboost::DMatrix::Load(tmp_file, false, false); - std::remove(tmp_file.c_str()); // Loop over the batches and count the records long row_count = 0; @@ -40,9 +43,10 @@ TEST(SimpleDMatrix, RowAccess) { } TEST(SimpleDMatrix, ColAccessWithoutBatches) { - std::string tmp_file = CreateSimpleTestData(); + dmlc::TemporaryDirectory tempdir; + const std::string tmp_file = tempdir.path + "/simple.libsvm"; + CreateSimpleTestData(tmp_file); xgboost::DMatrix * dmat = xgboost::DMatrix::Load(tmp_file, true, false); - std::remove(tmp_file.c_str()); // Sorted column access EXPECT_EQ(dmat->GetColDensity(0), 1); diff --git a/tests/cpp/data/test_sparse_page_dmatrix.cc b/tests/cpp/data/test_sparse_page_dmatrix.cc index 1c8533036..c4ca6aa49 100644 --- a/tests/cpp/data/test_sparse_page_dmatrix.cc +++ b/tests/cpp/data/test_sparse_page_dmatrix.cc @@ -1,11 +1,14 @@ // Copyright by Contributors #include +#include #include "../../../src/data/sparse_page_dmatrix.h" #include "../helpers.h" TEST(SparsePageDMatrix, MetaInfo) { - std::string tmp_file = CreateSimpleTestData(); + dmlc::TemporaryDirectory tempdir; + const std::string tmp_file = tempdir.path + "/simple.libsvm"; + CreateSimpleTestData(tmp_file); xgboost::DMatrix * dmat = xgboost::DMatrix::Load( tmp_file + "#" + tmp_file + ".cache", false, false); std::cout << tmp_file << std::endl; @@ -17,20 +20,16 @@ TEST(SparsePageDMatrix, MetaInfo) { EXPECT_EQ(dmat->Info().num_nonzero_, 6); EXPECT_EQ(dmat->Info().labels_.Size(), dmat->Info().num_row_); - // Clean up of external memory files - std::remove(tmp_file.c_str()); - std::remove((tmp_file + ".cache").c_str()); - std::remove((tmp_file + ".cache.row.page").c_str()); - delete dmat; } TEST(SparsePageDMatrix, RowAccess) { // Create sufficiently large data to make two row pages - std::string tmp_file = CreateBigTestData(5000000); + dmlc::TemporaryDirectory tempdir; + const std::string tmp_file = tempdir.path + "/big.libsvm"; + CreateBigTestData(tmp_file, 5000000); xgboost::DMatrix * dmat = xgboost::DMatrix::Load( tmp_file + "#" + tmp_file + ".cache", true, false); - std::remove(tmp_file.c_str()); EXPECT_TRUE(FileExists(tmp_file + ".cache.row.page")); // Loop over the batches and count the records @@ -47,18 +46,15 @@ TEST(SparsePageDMatrix, RowAccess) { EXPECT_EQ(first_row[2].index, 2); EXPECT_EQ(first_row[2].fvalue, 20); - // Clean up of external memory files - std::remove((tmp_file + ".cache").c_str()); - std::remove((tmp_file + ".cache.row.page").c_str()); - delete dmat; } TEST(SparsePageDMatrix, ColAccess) { - std::string tmp_file = CreateSimpleTestData(); + dmlc::TemporaryDirectory tempdir; + const std::string tmp_file = tempdir.path + "/simple.libsvm"; + CreateSimpleTestData(tmp_file); xgboost::DMatrix * dmat = xgboost::DMatrix::Load( tmp_file + "#" + tmp_file + ".cache", true, false); - std::remove(tmp_file.c_str()); EXPECT_EQ(dmat->GetColDensity(0), 1); EXPECT_EQ(dmat->GetColDensity(1), 0.5); @@ -82,10 +78,5 @@ TEST(SparsePageDMatrix, ColAccess) { EXPECT_TRUE(FileExists(tmp_file + ".cache.col.page")); EXPECT_TRUE(FileExists(tmp_file + ".cache.sorted.col.page")); - std::remove((tmp_file + ".cache").c_str()); - std::remove((tmp_file + ".cache.row.page").c_str()); - std::remove((tmp_file + ".cache.col.page").c_str()); - std::remove((tmp_file + ".cache.sorted.col.page").c_str()); - delete dmat; } diff --git a/tests/cpp/helpers.cc b/tests/cpp/helpers.cc index ba146af42..7585dc929 100644 --- a/tests/cpp/helpers.cc +++ b/tests/cpp/helpers.cc @@ -5,16 +5,6 @@ #include "xgboost/c_api.h" #include -std::string TempFileName() { - std::string tmp = std::tmpnam(nullptr); - std::replace(tmp.begin(), tmp.end(), '\\', - '/'); // Remove windows backslashes - // Remove drive prefix for windows - if (tmp.find("C:") != std::string::npos) - tmp.erase(tmp.find("C:"), 2); - return tmp; -} - bool FileExists(const std::string name) { struct stat st; return stat(name.c_str(), &st) == 0; @@ -26,22 +16,18 @@ long GetFileSize(const std::string filename) { return st.st_size; } -std::string CreateSimpleTestData() { - return CreateBigTestData(6); +void CreateSimpleTestData(const std::string& filename) { + CreateBigTestData(filename, 6); } -std::string CreateBigTestData(size_t n_entries) { - std::string tmp_file = TempFileName(); - std::ofstream fo; - fo.open(tmp_file); +void CreateBigTestData(const std::string& filename, size_t n_entries) { + std::ofstream fo(filename.c_str()); const size_t entries_per_row = 3; size_t n_rows = (n_entries + entries_per_row - 1) / entries_per_row; for (size_t i = 0; i < n_rows; ++i) { const char* row = i % 2 == 0 ? " 0:0 1:10 2:20\n" : " 0:0 3:30 4:40\n"; fo << i << row; } - fo.close(); - return tmp_file; } void _CheckObjFunction(xgboost::ObjFunction * obj, diff --git a/tests/cpp/helpers.h b/tests/cpp/helpers.h index 507bd3aa3..4d990884e 100644 --- a/tests/cpp/helpers.h +++ b/tests/cpp/helpers.h @@ -24,15 +24,13 @@ #define DeclareUnifiedTest(name) name #endif -std::string TempFileName(); - bool FileExists(const std::string name); long GetFileSize(const std::string filename); -std::string CreateSimpleTestData(); +void CreateSimpleTestData(const std::string& filename); -std::string CreateBigTestData(size_t n_entries); +void CreateBigTestData(const std::string& filename, size_t n_entries); void CheckObjFunction(xgboost::ObjFunction * obj, std::vector preds,