Simplify INI-style config reader using C++11 STL (#4478)
* simplify the config.h file * revise config.h * revised config.h * revise format * revise format issues * revise whitespace issues * revise whitespace namespace format issues * revise namespace format issues * format issues * format issues * format issues * format issues * Revert submodule changes * minor change * Update src/common/config.h Co-Authored-By: Philip Hyunsu Cho <chohyu01@cs.washington.edu> * address format issue from trivialfis * Use correct cub submodule
This commit is contained in:
parent
b48f895027
commit
dd60fc23e6
@ -91,3 +91,4 @@ List of Contributors
|
|||||||
* [Jiaxiang Li](https://github.com/JiaxiangBU)
|
* [Jiaxiang Li](https://github.com/JiaxiangBU)
|
||||||
* [Bryan Woods](https://github.com/bryan-woods)
|
* [Bryan Woods](https://github.com/bryan-woods)
|
||||||
- Bryan added support for cross-validation for the ranking objective
|
- Bryan added support for cross-validation for the ranking objective
|
||||||
|
* [Haoda Fu](https://github.com/fuhaoda)
|
||||||
@ -341,13 +341,10 @@ int CLIRunTask(int argc, char *argv[]) {
|
|||||||
}
|
}
|
||||||
rabit::Init(argc, argv);
|
rabit::Init(argc, argv);
|
||||||
|
|
||||||
std::vector<std::pair<std::string, std::string> > cfg;
|
common::ConfigParse cp(argv[1]);
|
||||||
|
auto cfg = cp.Parse();
|
||||||
cfg.emplace_back("seed", "0");
|
cfg.emplace_back("seed", "0");
|
||||||
|
|
||||||
common::ConfigIterator itr(argv[1]);
|
|
||||||
while (itr.Next()) {
|
|
||||||
cfg.emplace_back(std::string(itr.Name()), std::string(itr.Val()));
|
|
||||||
}
|
|
||||||
|
|
||||||
for (int i = 2; i < argc; ++i) {
|
for (int i = 2; i < argc; ++i) {
|
||||||
char name[256], val[256];
|
char name[256], val[256];
|
||||||
|
|||||||
@ -1,8 +1,8 @@
|
|||||||
/*!
|
/*!
|
||||||
* Copyright 2014 by Contributors
|
* Copyright 2014-2019 by Contributors
|
||||||
* \file config.h
|
* \file config.h
|
||||||
* \brief helper class to load in configures from file
|
* \brief helper class to load in configures from file
|
||||||
* \author Tianqi Chen
|
* \author Haoda Fu
|
||||||
*/
|
*/
|
||||||
#ifndef XGBOOST_COMMON_CONFIG_H_
|
#ifndef XGBOOST_COMMON_CONFIG_H_
|
||||||
#define XGBOOST_COMMON_CONFIG_H_
|
#define XGBOOST_COMMON_CONFIG_H_
|
||||||
@ -12,181 +12,73 @@
|
|||||||
#include <string>
|
#include <string>
|
||||||
#include <istream>
|
#include <istream>
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
|
#include <vector>
|
||||||
|
#include <utility>
|
||||||
|
|
||||||
namespace xgboost {
|
namespace xgboost {
|
||||||
namespace common {
|
namespace common {
|
||||||
/*!
|
/*!
|
||||||
* \brief base implementation of config reader
|
* \brief Implementation of config reader
|
||||||
*/
|
*/
|
||||||
class ConfigReaderBase {
|
class ConfigParse {
|
||||||
public:
|
|
||||||
/*!
|
|
||||||
* \brief get current name, called after Next returns true
|
|
||||||
* \return current parameter name
|
|
||||||
*/
|
|
||||||
inline const char *Name() const {
|
|
||||||
return s_name_.c_str();
|
|
||||||
}
|
|
||||||
/*!
|
|
||||||
* \brief get current value, called after Next returns true
|
|
||||||
* \return current parameter value
|
|
||||||
*/
|
|
||||||
inline const char *Val() const {
|
|
||||||
return s_val_.c_str();
|
|
||||||
}
|
|
||||||
/*!
|
|
||||||
* \brief move iterator to next position
|
|
||||||
* \return true if there is value in next position
|
|
||||||
*/
|
|
||||||
inline bool Next() {
|
|
||||||
while (!this->IsEnd()) {
|
|
||||||
GetNextToken(&s_name_);
|
|
||||||
if (s_name_ == "=") return false;
|
|
||||||
if (GetNextToken(&s_buf_) || s_buf_ != "=") return false;
|
|
||||||
if (GetNextToken(&s_val_) || s_val_ == "=") return false;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
// called before usage
|
|
||||||
inline void Init() {
|
|
||||||
ch_buf_ = this->GetChar();
|
|
||||||
}
|
|
||||||
|
|
||||||
protected:
|
|
||||||
/*!
|
|
||||||
* \brief to be implemented by subclass,
|
|
||||||
* get next token, return EOF if end of file
|
|
||||||
*/
|
|
||||||
virtual int GetChar() = 0;
|
|
||||||
/*! \brief to be implemented by child, check if end of stream */
|
|
||||||
virtual bool IsEnd() = 0;
|
|
||||||
|
|
||||||
private:
|
|
||||||
int ch_buf_;
|
|
||||||
std::string s_name_, s_val_, s_buf_;
|
|
||||||
|
|
||||||
inline void SkipLine() {
|
|
||||||
do {
|
|
||||||
ch_buf_ = this->GetChar();
|
|
||||||
} while (ch_buf_ != EOF && ch_buf_ != '\n' && ch_buf_ != '\r');
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void ParseStr(std::string *tok) {
|
|
||||||
while ((ch_buf_ = this->GetChar()) != EOF) {
|
|
||||||
switch (ch_buf_) {
|
|
||||||
case '\\': *tok += this->GetChar(); break;
|
|
||||||
case '\"': return;
|
|
||||||
case '\r':
|
|
||||||
case '\n': LOG(FATAL)<< "ConfigReader: unterminated string";
|
|
||||||
default: *tok += static_cast<char>(ch_buf_);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
LOG(FATAL) << "ConfigReader: unterminated string";
|
|
||||||
}
|
|
||||||
inline void ParseStrML(std::string *tok) {
|
|
||||||
while ((ch_buf_ = this->GetChar()) != EOF) {
|
|
||||||
switch (ch_buf_) {
|
|
||||||
case '\\': *tok += this->GetChar(); break;
|
|
||||||
case '\'': return;
|
|
||||||
default: *tok += static_cast<char>(ch_buf_);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
LOG(FATAL) << "unterminated string";
|
|
||||||
}
|
|
||||||
// return newline
|
|
||||||
inline bool GetNextToken(std::string *tok) {
|
|
||||||
tok->clear();
|
|
||||||
bool new_line = false;
|
|
||||||
while (ch_buf_ != EOF) {
|
|
||||||
switch (ch_buf_) {
|
|
||||||
case '#' : SkipLine(); new_line = true; break;
|
|
||||||
case '\"':
|
|
||||||
if (tok->length() == 0) {
|
|
||||||
ParseStr(tok); ch_buf_ = this->GetChar(); return new_line;
|
|
||||||
} else {
|
|
||||||
LOG(FATAL) << "ConfigReader: token followed directly by string";
|
|
||||||
}
|
|
||||||
case '\'':
|
|
||||||
if (tok->length() == 0) {
|
|
||||||
ParseStrML(tok); ch_buf_ = this->GetChar(); return new_line;
|
|
||||||
} else {
|
|
||||||
LOG(FATAL) << "ConfigReader: token followed directly by string";
|
|
||||||
}
|
|
||||||
case '=':
|
|
||||||
if (tok->length() == 0) {
|
|
||||||
ch_buf_ = this->GetChar();
|
|
||||||
*tok = '=';
|
|
||||||
}
|
|
||||||
return new_line;
|
|
||||||
case '\r':
|
|
||||||
case '\n':
|
|
||||||
if (tok->length() == 0) new_line = true;
|
|
||||||
case '\t':
|
|
||||||
case ' ' :
|
|
||||||
ch_buf_ = this->GetChar();
|
|
||||||
if (tok->length() != 0) return new_line;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
*tok += static_cast<char>(ch_buf_);
|
|
||||||
ch_buf_ = this->GetChar();
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (tok->length() == 0) {
|
|
||||||
return true;
|
|
||||||
} else {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
/*!
|
|
||||||
* \brief an iterator use stream base, allows use all types of istream
|
|
||||||
*/
|
|
||||||
class ConfigStreamReader: public ConfigReaderBase {
|
|
||||||
public:
|
public:
|
||||||
/*!
|
/*!
|
||||||
* \brief constructor
|
* \brief constructor
|
||||||
* \param fin istream input stream
|
* \param cfgFileName name of configure file
|
||||||
*/
|
*/
|
||||||
explicit ConfigStreamReader(std::istream &fin) : fin_(fin) {}
|
explicit ConfigParse(const std::string &cfgFileName) {
|
||||||
|
fi_.open(cfgFileName);
|
||||||
protected:
|
|
||||||
int GetChar() override {
|
|
||||||
return fin_.get();
|
|
||||||
}
|
|
||||||
/*! \brief to be implemented by child, check if end of stream */
|
|
||||||
bool IsEnd() override {
|
|
||||||
return fin_.eof();
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
std::istream &fin_;
|
|
||||||
};
|
|
||||||
|
|
||||||
/*!
|
|
||||||
* \brief an iterator that iterates over a configure file and gets the configures
|
|
||||||
*/
|
|
||||||
class ConfigIterator: public ConfigStreamReader {
|
|
||||||
public:
|
|
||||||
/*!
|
|
||||||
* \brief constructor
|
|
||||||
* \param fname name of configure file
|
|
||||||
*/
|
|
||||||
explicit ConfigIterator(const char *fname) : ConfigStreamReader(fi_) {
|
|
||||||
fi_.open(fname);
|
|
||||||
if (fi_.fail()) {
|
if (fi_.fail()) {
|
||||||
LOG(FATAL) << "cannot open file " << fname;
|
LOG(FATAL) << "cannot open file " << cfgFileName;
|
||||||
}
|
}
|
||||||
ConfigReaderBase::Init();
|
|
||||||
}
|
}
|
||||||
/*! \brief destructor */
|
|
||||||
~ConfigIterator() {
|
/*!
|
||||||
|
* \brief parse the configure file
|
||||||
|
*/
|
||||||
|
std::vector<std::pair<std::string, std::string> > Parse() {
|
||||||
|
std::vector<std::pair<std::string, std::string> > results{};
|
||||||
|
char delimiter = '=';
|
||||||
|
char comment = '#';
|
||||||
|
std::string line{};
|
||||||
|
std::string name{};
|
||||||
|
std::string value{};
|
||||||
|
|
||||||
|
while (!fi_.eof()) {
|
||||||
|
std::getline(fi_, line); // read a line of configure file
|
||||||
|
line = line.substr(0, line.find(comment)); // anything beyond # is comment
|
||||||
|
size_t delimiterPos = line.find(delimiter); // find the = sign
|
||||||
|
name = line.substr(0, delimiterPos); // anything before = is the name
|
||||||
|
// after this = is the value
|
||||||
|
value = line.substr(delimiterPos + 1, line.length() - delimiterPos - 1);
|
||||||
|
|
||||||
|
if (line.empty() || name.empty() || value.empty())
|
||||||
|
continue; // skip a line if # at beginning or there is no value or no name.
|
||||||
|
CleanString(&name); // clean the string
|
||||||
|
CleanString(&value);
|
||||||
|
results.emplace_back(name, value);
|
||||||
|
}
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
|
||||||
|
~ConfigParse() {
|
||||||
fi_.close();
|
fi_.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::ifstream fi_;
|
std::ifstream fi_;
|
||||||
|
std::string allowableChar_ =
|
||||||
|
"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_-./\\";
|
||||||
|
|
||||||
|
/*!
|
||||||
|
* \brief remove unnecessary chars.
|
||||||
|
*/
|
||||||
|
void CleanString(std::string * str) {
|
||||||
|
size_t firstIndx = str->find_first_of(allowableChar_);
|
||||||
|
size_t lastIndx = str->find_last_of(allowableChar_);
|
||||||
|
// this line can be more efficient, but keep as is for simplicity.
|
||||||
|
*str = str->substr(firstIndx, lastIndx - firstIndx + 1);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
} // namespace common
|
} // namespace common
|
||||||
} // namespace xgboost
|
} // namespace xgboost
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user