* Fix #3708: Use dmlc::TemporaryDirectory to handle temporaries in cross-platform way Also install git inside NVIDIA GPU container * Update dmlc-core
This commit is contained in:
parent
55ee9a92a1
commit
abf2f661be
@ -1 +1 @@
|
||||
Subproject commit e3377de933d3e069635150fb330ce6cc33b57950
|
||||
Subproject commit 4d49691f1a9d944c3b0aa5e63f1db3cad1f941f8
|
||||
@ -7,7 +7,7 @@ ENV DEBIAN_FRONTEND noninteractive
|
||||
# Install all basic requirements
|
||||
RUN \
|
||||
yum -y update && \
|
||||
yum install -y tar unzip wget xz && \
|
||||
yum install -y tar unzip wget xz git && \
|
||||
wget http://people.centos.org/tru/devtools-2/devtools-2.repo -O /etc/yum.repos.d/devtools-2.repo && \
|
||||
yum install -y devtoolset-2-gcc devtoolset-2-binutils devtoolset-2-gcc-c++ && \
|
||||
# Python
|
||||
|
||||
@ -6,7 +6,7 @@ ENV DEBIAN_FRONTEND noninteractive
|
||||
# Install all basic requirements
|
||||
RUN \
|
||||
yum -y update && \
|
||||
yum install -y graphviz tar unzip wget xz && \
|
||||
yum install -y graphviz tar unzip wget xz git && \
|
||||
# Python
|
||||
wget https://repo.continuum.io/miniconda/Miniconda2-4.3.27-Linux-x86_64.sh && \
|
||||
bash Miniconda2-4.3.27-Linux-x86_64.sh -b -p /opt/python
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
// Copyright by Contributors
|
||||
#include <dmlc/io.h>
|
||||
#include <dmlc/filesystem.h>
|
||||
#include <xgboost/data.h>
|
||||
#include <string>
|
||||
#include <memory>
|
||||
@ -48,8 +49,9 @@ TEST(MetaInfo, SaveLoadBinary) {
|
||||
info.num_row_ = 2;
|
||||
info.num_col_ = 1;
|
||||
|
||||
std::string tmp_file = TempFileName();
|
||||
dmlc::Stream * fs = dmlc::Stream::Create(tmp_file.c_str(), "w");
|
||||
dmlc::TemporaryDirectory tempdir;
|
||||
const std::string tmp_file = tempdir.path + "/metainfo.binary";
|
||||
dmlc::Stream* fs = dmlc::Stream::Create(tmp_file.c_str(), "w");
|
||||
info.SaveBinary(fs);
|
||||
delete fs;
|
||||
|
||||
@ -62,14 +64,12 @@ TEST(MetaInfo, SaveLoadBinary) {
|
||||
EXPECT_EQ(inforead.labels_.HostVector(), info.labels_.HostVector());
|
||||
EXPECT_EQ(inforead.num_col_, info.num_col_);
|
||||
EXPECT_EQ(inforead.num_row_, info.num_row_);
|
||||
|
||||
std::remove(tmp_file.c_str());
|
||||
|
||||
delete fs;
|
||||
}
|
||||
|
||||
TEST(MetaInfo, LoadQid) {
|
||||
std::string tmp_file = TempFileName();
|
||||
dmlc::TemporaryDirectory tempdir;
|
||||
std::string tmp_file = tempdir.path + "/qid_test.libsvm";
|
||||
{
|
||||
std::unique_ptr<dmlc::Stream> fs(
|
||||
dmlc::Stream::Create(tmp_file.c_str(), "w"));
|
||||
@ -90,7 +90,6 @@ TEST(MetaInfo, LoadQid) {
|
||||
}
|
||||
std::unique_ptr<xgboost::DMatrix> dmat(
|
||||
xgboost::DMatrix::Load(tmp_file, true, false, "libsvm"));
|
||||
std::remove(tmp_file.c_str());
|
||||
|
||||
const xgboost::MetaInfo& info = dmat->Info();
|
||||
const std::vector<uint64_t> expected_qids{1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3};
|
||||
|
||||
@ -1,18 +1,19 @@
|
||||
// Copyright by Contributors
|
||||
#include <xgboost/data.h>
|
||||
#include <dmlc/filesystem.h>
|
||||
#include "../../../src/data/simple_csr_source.h"
|
||||
|
||||
#include "../helpers.h"
|
||||
|
||||
TEST(SimpleCSRSource, SaveLoadBinary) {
|
||||
std::string tmp_file = CreateSimpleTestData();
|
||||
dmlc::TemporaryDirectory tempdir;
|
||||
const std::string tmp_file = tempdir.path + "/simple.libsvm";
|
||||
CreateSimpleTestData(tmp_file);
|
||||
xgboost::DMatrix * dmat = xgboost::DMatrix::Load(tmp_file, true, false);
|
||||
std::remove(tmp_file.c_str());
|
||||
|
||||
std::string tmp_binfile = TempFileName();
|
||||
const std::string tmp_binfile = tempdir.path + "/csr_source.binary";
|
||||
dmat->SaveToLocalFile(tmp_binfile);
|
||||
xgboost::DMatrix * dmat_read = xgboost::DMatrix::Load(tmp_binfile, true, false);
|
||||
std::remove(tmp_binfile.c_str());
|
||||
|
||||
EXPECT_EQ(dmat->Info().num_col_, dmat_read->Info().num_col_);
|
||||
EXPECT_EQ(dmat->Info().num_row_, dmat_read->Info().num_row_);
|
||||
|
||||
@ -1,13 +1,15 @@
|
||||
// Copyright by Contributors
|
||||
#include <xgboost/data.h>
|
||||
#include <dmlc/filesystem.h>
|
||||
#include "../../../src/data/simple_dmatrix.h"
|
||||
|
||||
#include "../helpers.h"
|
||||
|
||||
TEST(SimpleDMatrix, MetaInfo) {
|
||||
std::string tmp_file = CreateSimpleTestData();
|
||||
dmlc::TemporaryDirectory tempdir;
|
||||
const std::string tmp_file = tempdir.path + "/simple.libsvm";
|
||||
CreateSimpleTestData(tmp_file);
|
||||
xgboost::DMatrix * dmat = xgboost::DMatrix::Load(tmp_file, true, false);
|
||||
std::remove(tmp_file.c_str());
|
||||
|
||||
// Test the metadata that was parsed
|
||||
EXPECT_EQ(dmat->Info().num_row_, 2);
|
||||
@ -19,9 +21,10 @@ TEST(SimpleDMatrix, MetaInfo) {
|
||||
}
|
||||
|
||||
TEST(SimpleDMatrix, RowAccess) {
|
||||
std::string tmp_file = CreateSimpleTestData();
|
||||
dmlc::TemporaryDirectory tempdir;
|
||||
const std::string tmp_file = tempdir.path + "/simple.libsvm";
|
||||
CreateSimpleTestData(tmp_file);
|
||||
xgboost::DMatrix * dmat = xgboost::DMatrix::Load(tmp_file, false, false);
|
||||
std::remove(tmp_file.c_str());
|
||||
|
||||
// Loop over the batches and count the records
|
||||
long row_count = 0;
|
||||
@ -40,9 +43,10 @@ TEST(SimpleDMatrix, RowAccess) {
|
||||
}
|
||||
|
||||
TEST(SimpleDMatrix, ColAccessWithoutBatches) {
|
||||
std::string tmp_file = CreateSimpleTestData();
|
||||
dmlc::TemporaryDirectory tempdir;
|
||||
const std::string tmp_file = tempdir.path + "/simple.libsvm";
|
||||
CreateSimpleTestData(tmp_file);
|
||||
xgboost::DMatrix * dmat = xgboost::DMatrix::Load(tmp_file, true, false);
|
||||
std::remove(tmp_file.c_str());
|
||||
|
||||
// Sorted column access
|
||||
EXPECT_EQ(dmat->GetColDensity(0), 1);
|
||||
|
||||
@ -1,11 +1,14 @@
|
||||
// Copyright by Contributors
|
||||
#include <xgboost/data.h>
|
||||
#include <dmlc/filesystem.h>
|
||||
#include "../../../src/data/sparse_page_dmatrix.h"
|
||||
|
||||
#include "../helpers.h"
|
||||
|
||||
TEST(SparsePageDMatrix, MetaInfo) {
|
||||
std::string tmp_file = CreateSimpleTestData();
|
||||
dmlc::TemporaryDirectory tempdir;
|
||||
const std::string tmp_file = tempdir.path + "/simple.libsvm";
|
||||
CreateSimpleTestData(tmp_file);
|
||||
xgboost::DMatrix * dmat = xgboost::DMatrix::Load(
|
||||
tmp_file + "#" + tmp_file + ".cache", false, false);
|
||||
std::cout << tmp_file << std::endl;
|
||||
@ -17,20 +20,16 @@ TEST(SparsePageDMatrix, MetaInfo) {
|
||||
EXPECT_EQ(dmat->Info().num_nonzero_, 6);
|
||||
EXPECT_EQ(dmat->Info().labels_.Size(), dmat->Info().num_row_);
|
||||
|
||||
// Clean up of external memory files
|
||||
std::remove(tmp_file.c_str());
|
||||
std::remove((tmp_file + ".cache").c_str());
|
||||
std::remove((tmp_file + ".cache.row.page").c_str());
|
||||
|
||||
delete dmat;
|
||||
}
|
||||
|
||||
TEST(SparsePageDMatrix, RowAccess) {
|
||||
// Create sufficiently large data to make two row pages
|
||||
std::string tmp_file = CreateBigTestData(5000000);
|
||||
dmlc::TemporaryDirectory tempdir;
|
||||
const std::string tmp_file = tempdir.path + "/big.libsvm";
|
||||
CreateBigTestData(tmp_file, 5000000);
|
||||
xgboost::DMatrix * dmat = xgboost::DMatrix::Load(
|
||||
tmp_file + "#" + tmp_file + ".cache", true, false);
|
||||
std::remove(tmp_file.c_str());
|
||||
EXPECT_TRUE(FileExists(tmp_file + ".cache.row.page"));
|
||||
|
||||
// Loop over the batches and count the records
|
||||
@ -47,18 +46,15 @@ TEST(SparsePageDMatrix, RowAccess) {
|
||||
EXPECT_EQ(first_row[2].index, 2);
|
||||
EXPECT_EQ(first_row[2].fvalue, 20);
|
||||
|
||||
// Clean up of external memory files
|
||||
std::remove((tmp_file + ".cache").c_str());
|
||||
std::remove((tmp_file + ".cache.row.page").c_str());
|
||||
|
||||
delete dmat;
|
||||
}
|
||||
|
||||
TEST(SparsePageDMatrix, ColAccess) {
|
||||
std::string tmp_file = CreateSimpleTestData();
|
||||
dmlc::TemporaryDirectory tempdir;
|
||||
const std::string tmp_file = tempdir.path + "/simple.libsvm";
|
||||
CreateSimpleTestData(tmp_file);
|
||||
xgboost::DMatrix * dmat = xgboost::DMatrix::Load(
|
||||
tmp_file + "#" + tmp_file + ".cache", true, false);
|
||||
std::remove(tmp_file.c_str());
|
||||
|
||||
EXPECT_EQ(dmat->GetColDensity(0), 1);
|
||||
EXPECT_EQ(dmat->GetColDensity(1), 0.5);
|
||||
@ -82,10 +78,5 @@ TEST(SparsePageDMatrix, ColAccess) {
|
||||
EXPECT_TRUE(FileExists(tmp_file + ".cache.col.page"));
|
||||
EXPECT_TRUE(FileExists(tmp_file + ".cache.sorted.col.page"));
|
||||
|
||||
std::remove((tmp_file + ".cache").c_str());
|
||||
std::remove((tmp_file + ".cache.row.page").c_str());
|
||||
std::remove((tmp_file + ".cache.col.page").c_str());
|
||||
std::remove((tmp_file + ".cache.sorted.col.page").c_str());
|
||||
|
||||
delete dmat;
|
||||
}
|
||||
|
||||
@ -5,16 +5,6 @@
|
||||
#include "xgboost/c_api.h"
|
||||
#include <random>
|
||||
|
||||
std::string TempFileName() {
|
||||
std::string tmp = std::tmpnam(nullptr);
|
||||
std::replace(tmp.begin(), tmp.end(), '\\',
|
||||
'/'); // Remove windows backslashes
|
||||
// Remove drive prefix for windows
|
||||
if (tmp.find("C:") != std::string::npos)
|
||||
tmp.erase(tmp.find("C:"), 2);
|
||||
return tmp;
|
||||
}
|
||||
|
||||
bool FileExists(const std::string name) {
|
||||
struct stat st;
|
||||
return stat(name.c_str(), &st) == 0;
|
||||
@ -26,22 +16,18 @@ long GetFileSize(const std::string filename) {
|
||||
return st.st_size;
|
||||
}
|
||||
|
||||
std::string CreateSimpleTestData() {
|
||||
return CreateBigTestData(6);
|
||||
void CreateSimpleTestData(const std::string& filename) {
|
||||
CreateBigTestData(filename, 6);
|
||||
}
|
||||
|
||||
std::string CreateBigTestData(size_t n_entries) {
|
||||
std::string tmp_file = TempFileName();
|
||||
std::ofstream fo;
|
||||
fo.open(tmp_file);
|
||||
void CreateBigTestData(const std::string& filename, size_t n_entries) {
|
||||
std::ofstream fo(filename.c_str());
|
||||
const size_t entries_per_row = 3;
|
||||
size_t n_rows = (n_entries + entries_per_row - 1) / entries_per_row;
|
||||
for (size_t i = 0; i < n_rows; ++i) {
|
||||
const char* row = i % 2 == 0 ? " 0:0 1:10 2:20\n" : " 0:0 3:30 4:40\n";
|
||||
fo << i << row;
|
||||
}
|
||||
fo.close();
|
||||
return tmp_file;
|
||||
}
|
||||
|
||||
void _CheckObjFunction(xgboost::ObjFunction * obj,
|
||||
|
||||
@ -24,15 +24,13 @@
|
||||
#define DeclareUnifiedTest(name) name
|
||||
#endif
|
||||
|
||||
std::string TempFileName();
|
||||
|
||||
bool FileExists(const std::string name);
|
||||
|
||||
long GetFileSize(const std::string filename);
|
||||
|
||||
std::string CreateSimpleTestData();
|
||||
void CreateSimpleTestData(const std::string& filename);
|
||||
|
||||
std::string CreateBigTestData(size_t n_entries);
|
||||
void CreateBigTestData(const std::string& filename, size_t n_entries);
|
||||
|
||||
void CheckObjFunction(xgboost::ObjFunction * obj,
|
||||
std::vector<xgboost::bst_float> preds,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user