diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index b8cb9607c..e185889eb 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -91,3 +91,4 @@ List of Contributors * [Jiaxiang Li](https://github.com/JiaxiangBU) * [Bryan Woods](https://github.com/bryan-woods) - Bryan added support for cross-validation for the ranking objective +* [Haoda Fu](https://github.com/fuhaoda) \ No newline at end of file diff --git a/src/cli_main.cc b/src/cli_main.cc index ccd675715..ef82ff86f 100644 --- a/src/cli_main.cc +++ b/src/cli_main.cc @@ -341,13 +341,10 @@ int CLIRunTask(int argc, char *argv[]) { } rabit::Init(argc, argv); - std::vector > cfg; + common::ConfigParse cp(argv[1]); + auto cfg = cp.Parse(); cfg.emplace_back("seed", "0"); - common::ConfigIterator itr(argv[1]); - while (itr.Next()) { - cfg.emplace_back(std::string(itr.Name()), std::string(itr.Val())); - } for (int i = 2; i < argc; ++i) { char name[256], val[256]; diff --git a/src/common/config.h b/src/common/config.h index fa99a0733..dfbccd1c5 100644 --- a/src/common/config.h +++ b/src/common/config.h @@ -1,8 +1,8 @@ /*! - * Copyright 2014 by Contributors + * Copyright 2014-2019 by Contributors * \file config.h * \brief helper class to load in configures from file - * \author Tianqi Chen + * \author Haoda Fu */ #ifndef XGBOOST_COMMON_CONFIG_H_ #define XGBOOST_COMMON_CONFIG_H_ @@ -12,181 +12,73 @@ #include #include #include +#include +#include namespace xgboost { namespace common { /*! - * \brief base implementation of config reader + * \brief Implementation of config reader */ -class ConfigReaderBase { - public: - /*! - * \brief get current name, called after Next returns true - * \return current parameter name - */ - inline const char *Name() const { - return s_name_.c_str(); - } - /*! - * \brief get current value, called after Next returns true - * \return current parameter value - */ - inline const char *Val() const { - return s_val_.c_str(); - } - /*! - * \brief move iterator to next position - * \return true if there is value in next position - */ - inline bool Next() { - while (!this->IsEnd()) { - GetNextToken(&s_name_); - if (s_name_ == "=") return false; - if (GetNextToken(&s_buf_) || s_buf_ != "=") return false; - if (GetNextToken(&s_val_) || s_val_ == "=") return false; - return true; - } - return false; - } - // called before usage - inline void Init() { - ch_buf_ = this->GetChar(); - } - - protected: - /*! - * \brief to be implemented by subclass, - * get next token, return EOF if end of file - */ - virtual int GetChar() = 0; - /*! \brief to be implemented by child, check if end of stream */ - virtual bool IsEnd() = 0; - - private: - int ch_buf_; - std::string s_name_, s_val_, s_buf_; - - inline void SkipLine() { - do { - ch_buf_ = this->GetChar(); - } while (ch_buf_ != EOF && ch_buf_ != '\n' && ch_buf_ != '\r'); - } - - inline void ParseStr(std::string *tok) { - while ((ch_buf_ = this->GetChar()) != EOF) { - switch (ch_buf_) { - case '\\': *tok += this->GetChar(); break; - case '\"': return; - case '\r': - case '\n': LOG(FATAL)<< "ConfigReader: unterminated string"; - default: *tok += static_cast(ch_buf_); - } - } - LOG(FATAL) << "ConfigReader: unterminated string"; - } - inline void ParseStrML(std::string *tok) { - while ((ch_buf_ = this->GetChar()) != EOF) { - switch (ch_buf_) { - case '\\': *tok += this->GetChar(); break; - case '\'': return; - default: *tok += static_cast(ch_buf_); - } - } - LOG(FATAL) << "unterminated string"; - } - // return newline - inline bool GetNextToken(std::string *tok) { - tok->clear(); - bool new_line = false; - while (ch_buf_ != EOF) { - switch (ch_buf_) { - case '#' : SkipLine(); new_line = true; break; - case '\"': - if (tok->length() == 0) { - ParseStr(tok); ch_buf_ = this->GetChar(); return new_line; - } else { - LOG(FATAL) << "ConfigReader: token followed directly by string"; - } - case '\'': - if (tok->length() == 0) { - ParseStrML(tok); ch_buf_ = this->GetChar(); return new_line; - } else { - LOG(FATAL) << "ConfigReader: token followed directly by string"; - } - case '=': - if (tok->length() == 0) { - ch_buf_ = this->GetChar(); - *tok = '='; - } - return new_line; - case '\r': - case '\n': - if (tok->length() == 0) new_line = true; - case '\t': - case ' ' : - ch_buf_ = this->GetChar(); - if (tok->length() != 0) return new_line; - break; - default: - *tok += static_cast(ch_buf_); - ch_buf_ = this->GetChar(); - break; - } - } - if (tok->length() == 0) { - return true; - } else { - return false; - } - } -}; -/*! - * \brief an iterator use stream base, allows use all types of istream - */ -class ConfigStreamReader: public ConfigReaderBase { +class ConfigParse { public: /*! * \brief constructor - * \param fin istream input stream + * \param cfgFileName name of configure file */ - explicit ConfigStreamReader(std::istream &fin) : fin_(fin) {} - - protected: - int GetChar() override { - return fin_.get(); - } - /*! \brief to be implemented by child, check if end of stream */ - bool IsEnd() override { - return fin_.eof(); - } - - private: - std::istream &fin_; -}; - -/*! - * \brief an iterator that iterates over a configure file and gets the configures - */ -class ConfigIterator: public ConfigStreamReader { - public: - /*! - * \brief constructor - * \param fname name of configure file - */ - explicit ConfigIterator(const char *fname) : ConfigStreamReader(fi_) { - fi_.open(fname); + explicit ConfigParse(const std::string &cfgFileName) { + fi_.open(cfgFileName); if (fi_.fail()) { - LOG(FATAL) << "cannot open file " << fname; + LOG(FATAL) << "cannot open file " << cfgFileName; } - ConfigReaderBase::Init(); } - /*! \brief destructor */ - ~ConfigIterator() { + + /*! + * \brief parse the configure file + */ + std::vector > Parse() { + std::vector > results{}; + char delimiter = '='; + char comment = '#'; + std::string line{}; + std::string name{}; + std::string value{}; + + while (!fi_.eof()) { + std::getline(fi_, line); // read a line of configure file + line = line.substr(0, line.find(comment)); // anything beyond # is comment + size_t delimiterPos = line.find(delimiter); // find the = sign + name = line.substr(0, delimiterPos); // anything before = is the name + // after this = is the value + value = line.substr(delimiterPos + 1, line.length() - delimiterPos - 1); + + if (line.empty() || name.empty() || value.empty()) + continue; // skip a line if # at beginning or there is no value or no name. + CleanString(&name); // clean the string + CleanString(&value); + results.emplace_back(name, value); + } + return results; + } + + ~ConfigParse() { fi_.close(); } private: std::ifstream fi_; + std::string allowableChar_ = + "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_-./\\"; + + /*! + * \brief remove unnecessary chars. + */ + void CleanString(std::string * str) { + size_t firstIndx = str->find_first_of(allowableChar_); + size_t lastIndx = str->find_last_of(allowableChar_); + // this line can be more efficient, but keep as is for simplicity. + *str = str->substr(firstIndx, lastIndx - firstIndx + 1); + } }; } // namespace common } // namespace xgboost