Ensure that LoadSequentialFile() actually read the whole file (#5831)

This commit is contained in:
Philip Hyunsu Cho 2020-07-04 01:17:11 -07:00 committed by GitHub
parent 1a0801238e
commit efe3e48ae2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 17 additions and 35 deletions

View File

@ -9,12 +9,12 @@
#include <vector>
#include <memory>
#include <string>
#include <cinttypes>
#include <utility>
#include <map>
#include <limits>
#include <sstream>
#include <locale>
#include <cinttypes>
namespace xgboost {
/*
@ -86,6 +86,8 @@ class JsonReader {
msg += "\", got: \"";
if (got == -1) {
msg += "EOF\"";
} else if (got == 0) {
msg += "\\0\"";
} else {
msg += std::to_string(got) + " \"";
}

View File

@ -7,9 +7,10 @@
#include <unistd.h>
#endif // defined(__unix__)
#include <algorithm>
#include <cstdio>
#include <fstream>
#include <string>
#include <utility>
#include <cstdio>
#include "xgboost/logging.h"
#include "io.h"
@ -108,39 +109,17 @@ std::string LoadSequentialFile(std::string fname) {
};
std::string buffer;
#if defined(__unix__)
struct stat fs;
if (stat(fname.c_str(), &fs) != 0) {
OpenErr();
}
size_t f_size_bytes = fs.st_size;
buffer.resize(f_size_bytes + 1);
int32_t fd = open(fname.c_str(), O_RDONLY);
#if defined(__linux__)
posix_fadvise(fd, 0, 0, POSIX_FADV_SEQUENTIAL);
#endif // defined(__linux__)
ssize_t bytes_read = read(fd, &buffer[0], f_size_bytes);
if (bytes_read < 0) {
close(fd);
ReadErr();
}
close(fd);
#else // defined(__unix__)
FILE *f = fopen(fname.c_str(), "r");
if (f == NULL) {
std::string msg;
OpenErr();
}
fseek(f, 0, SEEK_END);
auto fsize = ftell(f);
fseek(f, 0, SEEK_SET);
buffer.resize(fsize + 1);
fread(&buffer[0], 1, fsize, f);
fclose(f);
#endif // defined(__unix__)
// Open in binary mode so that correct file size can be computed with seekg().
// This accommodates Windows platform:
// https://docs.microsoft.com/en-us/cpp/standard-library/basic-istream-class?view=vs-2019#seekg
std::ifstream ifs(fname, std::ios_base::binary | std::ios_base::in);
ifs.seekg(0, std::ios_base::end);
const size_t file_size = static_cast<size_t>(ifs.tellg());
ifs.seekg(0, std::ios_base::beg);
buffer.resize(file_size + 1);
ifs.read(&buffer[0], file_size);
buffer.back() = '\0';
return buffer;
}

View File

@ -75,7 +75,6 @@ class FixedSizeStream : public PeekableInStream {
std::string buffer_;
};
// Optimized for consecutive file loading in unix like systime.
std::string LoadSequentialFile(std::string fname);
inline std::string FileExtension(std::string const& fname) {

View File

@ -427,6 +427,8 @@ void JsonReader::Error(std::string msg) const {
for (auto c : raw_portion) {
if (c == '\n') {
portion += "\\n";
} else if (c == '\0') {
portion += "\\0";
} else {
portion += c;
}