Simplify INI-style config reader using C++11 STL (#4478)
* simplify the config.h file * revise config.h * revised config.h * revise format * revise format issues * revise whitespace issues * revise whitespace namespace format issues * revise namespace format issues * format issues * format issues * format issues * format issues * Revert submodule changes * minor change * Update src/common/config.h Co-Authored-By: Philip Hyunsu Cho <chohyu01@cs.washington.edu> * address format issue from trivialfis * Use correct cub submodule
This commit is contained in:
parent
b48f895027
commit
dd60fc23e6
@ -91,3 +91,4 @@ List of Contributors
|
||||
* [Jiaxiang Li](https://github.com/JiaxiangBU)
|
||||
* [Bryan Woods](https://github.com/bryan-woods)
|
||||
- Bryan added support for cross-validation for the ranking objective
|
||||
* [Haoda Fu](https://github.com/fuhaoda)
|
||||
@ -341,13 +341,10 @@ int CLIRunTask(int argc, char *argv[]) {
|
||||
}
|
||||
rabit::Init(argc, argv);
|
||||
|
||||
std::vector<std::pair<std::string, std::string> > cfg;
|
||||
common::ConfigParse cp(argv[1]);
|
||||
auto cfg = cp.Parse();
|
||||
cfg.emplace_back("seed", "0");
|
||||
|
||||
common::ConfigIterator itr(argv[1]);
|
||||
while (itr.Next()) {
|
||||
cfg.emplace_back(std::string(itr.Name()), std::string(itr.Val()));
|
||||
}
|
||||
|
||||
for (int i = 2; i < argc; ++i) {
|
||||
char name[256], val[256];
|
||||
|
||||
@ -1,8 +1,8 @@
|
||||
/*!
|
||||
* Copyright 2014 by Contributors
|
||||
* Copyright 2014-2019 by Contributors
|
||||
* \file config.h
|
||||
* \brief helper class to load in configures from file
|
||||
* \author Tianqi Chen
|
||||
* \author Haoda Fu
|
||||
*/
|
||||
#ifndef XGBOOST_COMMON_CONFIG_H_
|
||||
#define XGBOOST_COMMON_CONFIG_H_
|
||||
@ -12,181 +12,73 @@
|
||||
#include <string>
|
||||
#include <istream>
|
||||
#include <fstream>
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
/*!
|
||||
* \brief base implementation of config reader
|
||||
* \brief Implementation of config reader
|
||||
*/
|
||||
class ConfigReaderBase {
|
||||
public:
|
||||
/*!
|
||||
* \brief get current name, called after Next returns true
|
||||
* \return current parameter name
|
||||
*/
|
||||
inline const char *Name() const {
|
||||
return s_name_.c_str();
|
||||
}
|
||||
/*!
|
||||
* \brief get current value, called after Next returns true
|
||||
* \return current parameter value
|
||||
*/
|
||||
inline const char *Val() const {
|
||||
return s_val_.c_str();
|
||||
}
|
||||
/*!
|
||||
* \brief move iterator to next position
|
||||
* \return true if there is value in next position
|
||||
*/
|
||||
inline bool Next() {
|
||||
while (!this->IsEnd()) {
|
||||
GetNextToken(&s_name_);
|
||||
if (s_name_ == "=") return false;
|
||||
if (GetNextToken(&s_buf_) || s_buf_ != "=") return false;
|
||||
if (GetNextToken(&s_val_) || s_val_ == "=") return false;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
// called before usage
|
||||
inline void Init() {
|
||||
ch_buf_ = this->GetChar();
|
||||
}
|
||||
|
||||
protected:
|
||||
/*!
|
||||
* \brief to be implemented by subclass,
|
||||
* get next token, return EOF if end of file
|
||||
*/
|
||||
virtual int GetChar() = 0;
|
||||
/*! \brief to be implemented by child, check if end of stream */
|
||||
virtual bool IsEnd() = 0;
|
||||
|
||||
private:
|
||||
int ch_buf_;
|
||||
std::string s_name_, s_val_, s_buf_;
|
||||
|
||||
inline void SkipLine() {
|
||||
do {
|
||||
ch_buf_ = this->GetChar();
|
||||
} while (ch_buf_ != EOF && ch_buf_ != '\n' && ch_buf_ != '\r');
|
||||
}
|
||||
|
||||
inline void ParseStr(std::string *tok) {
|
||||
while ((ch_buf_ = this->GetChar()) != EOF) {
|
||||
switch (ch_buf_) {
|
||||
case '\\': *tok += this->GetChar(); break;
|
||||
case '\"': return;
|
||||
case '\r':
|
||||
case '\n': LOG(FATAL)<< "ConfigReader: unterminated string";
|
||||
default: *tok += static_cast<char>(ch_buf_);
|
||||
}
|
||||
}
|
||||
LOG(FATAL) << "ConfigReader: unterminated string";
|
||||
}
|
||||
inline void ParseStrML(std::string *tok) {
|
||||
while ((ch_buf_ = this->GetChar()) != EOF) {
|
||||
switch (ch_buf_) {
|
||||
case '\\': *tok += this->GetChar(); break;
|
||||
case '\'': return;
|
||||
default: *tok += static_cast<char>(ch_buf_);
|
||||
}
|
||||
}
|
||||
LOG(FATAL) << "unterminated string";
|
||||
}
|
||||
// return newline
|
||||
inline bool GetNextToken(std::string *tok) {
|
||||
tok->clear();
|
||||
bool new_line = false;
|
||||
while (ch_buf_ != EOF) {
|
||||
switch (ch_buf_) {
|
||||
case '#' : SkipLine(); new_line = true; break;
|
||||
case '\"':
|
||||
if (tok->length() == 0) {
|
||||
ParseStr(tok); ch_buf_ = this->GetChar(); return new_line;
|
||||
} else {
|
||||
LOG(FATAL) << "ConfigReader: token followed directly by string";
|
||||
}
|
||||
case '\'':
|
||||
if (tok->length() == 0) {
|
||||
ParseStrML(tok); ch_buf_ = this->GetChar(); return new_line;
|
||||
} else {
|
||||
LOG(FATAL) << "ConfigReader: token followed directly by string";
|
||||
}
|
||||
case '=':
|
||||
if (tok->length() == 0) {
|
||||
ch_buf_ = this->GetChar();
|
||||
*tok = '=';
|
||||
}
|
||||
return new_line;
|
||||
case '\r':
|
||||
case '\n':
|
||||
if (tok->length() == 0) new_line = true;
|
||||
case '\t':
|
||||
case ' ' :
|
||||
ch_buf_ = this->GetChar();
|
||||
if (tok->length() != 0) return new_line;
|
||||
break;
|
||||
default:
|
||||
*tok += static_cast<char>(ch_buf_);
|
||||
ch_buf_ = this->GetChar();
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (tok->length() == 0) {
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
};
|
||||
/*!
|
||||
* \brief an iterator use stream base, allows use all types of istream
|
||||
*/
|
||||
class ConfigStreamReader: public ConfigReaderBase {
|
||||
class ConfigParse {
|
||||
public:
|
||||
/*!
|
||||
* \brief constructor
|
||||
* \param fin istream input stream
|
||||
* \param cfgFileName name of configure file
|
||||
*/
|
||||
explicit ConfigStreamReader(std::istream &fin) : fin_(fin) {}
|
||||
|
||||
protected:
|
||||
int GetChar() override {
|
||||
return fin_.get();
|
||||
}
|
||||
/*! \brief to be implemented by child, check if end of stream */
|
||||
bool IsEnd() override {
|
||||
return fin_.eof();
|
||||
}
|
||||
|
||||
private:
|
||||
std::istream &fin_;
|
||||
};
|
||||
|
||||
/*!
|
||||
* \brief an iterator that iterates over a configure file and gets the configures
|
||||
*/
|
||||
class ConfigIterator: public ConfigStreamReader {
|
||||
public:
|
||||
/*!
|
||||
* \brief constructor
|
||||
* \param fname name of configure file
|
||||
*/
|
||||
explicit ConfigIterator(const char *fname) : ConfigStreamReader(fi_) {
|
||||
fi_.open(fname);
|
||||
explicit ConfigParse(const std::string &cfgFileName) {
|
||||
fi_.open(cfgFileName);
|
||||
if (fi_.fail()) {
|
||||
LOG(FATAL) << "cannot open file " << fname;
|
||||
LOG(FATAL) << "cannot open file " << cfgFileName;
|
||||
}
|
||||
ConfigReaderBase::Init();
|
||||
}
|
||||
/*! \brief destructor */
|
||||
~ConfigIterator() {
|
||||
|
||||
/*!
|
||||
* \brief parse the configure file
|
||||
*/
|
||||
std::vector<std::pair<std::string, std::string> > Parse() {
|
||||
std::vector<std::pair<std::string, std::string> > results{};
|
||||
char delimiter = '=';
|
||||
char comment = '#';
|
||||
std::string line{};
|
||||
std::string name{};
|
||||
std::string value{};
|
||||
|
||||
while (!fi_.eof()) {
|
||||
std::getline(fi_, line); // read a line of configure file
|
||||
line = line.substr(0, line.find(comment)); // anything beyond # is comment
|
||||
size_t delimiterPos = line.find(delimiter); // find the = sign
|
||||
name = line.substr(0, delimiterPos); // anything before = is the name
|
||||
// after this = is the value
|
||||
value = line.substr(delimiterPos + 1, line.length() - delimiterPos - 1);
|
||||
|
||||
if (line.empty() || name.empty() || value.empty())
|
||||
continue; // skip a line if # at beginning or there is no value or no name.
|
||||
CleanString(&name); // clean the string
|
||||
CleanString(&value);
|
||||
results.emplace_back(name, value);
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
~ConfigParse() {
|
||||
fi_.close();
|
||||
}
|
||||
|
||||
private:
|
||||
std::ifstream fi_;
|
||||
std::string allowableChar_ =
|
||||
"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_-./\\";
|
||||
|
||||
/*!
|
||||
* \brief remove unnecessary chars.
|
||||
*/
|
||||
void CleanString(std::string * str) {
|
||||
size_t firstIndx = str->find_first_of(allowableChar_);
|
||||
size_t lastIndx = str->find_last_of(allowableChar_);
|
||||
// this line can be more efficient, but keep as is for simplicity.
|
||||
*str = str->substr(firstIndx, lastIndx - firstIndx + 1);
|
||||
}
|
||||
};
|
||||
} // namespace common
|
||||
} // namespace xgboost
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user