From 937fa282b52cbf1b916d8a3d68b8cd6de82945fa Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Fri, 12 Nov 2021 18:22:30 +0800 Subject: [PATCH] Extract string view. (#7416) * Add equality operators. * Return a view in substr. * Add proper iterator types. --- include/xgboost/intrusive_ptr.h | 1 + include/xgboost/json.h | 48 +++-------------- include/xgboost/json_io.h | 4 +- include/xgboost/string_view.h | 81 ++++++++++++++++++++++++++++ src/common/json.cc | 22 ++++---- tests/cpp/common/test_json.cc | 16 ++---- tests/cpp/common/test_string_view.cc | 28 ++++++++++ 7 files changed, 130 insertions(+), 70 deletions(-) create mode 100644 include/xgboost/string_view.h create mode 100644 tests/cpp/common/test_string_view.cc diff --git a/include/xgboost/intrusive_ptr.h b/include/xgboost/intrusive_ptr.h index 9ebadb24b..1c58704c4 100644 --- a/include/xgboost/intrusive_ptr.h +++ b/include/xgboost/intrusive_ptr.h @@ -9,6 +9,7 @@ #include #include #include +#include namespace xgboost { /*! diff --git a/include/xgboost/json.h b/include/xgboost/json.h index ab6ba6ee1..62f5ad8f6 100644 --- a/include/xgboost/json.h +++ b/include/xgboost/json.h @@ -4,16 +4,17 @@ #ifndef XGBOOST_JSON_H_ #define XGBOOST_JSON_H_ +#include #include #include -#include +#include +#include #include #include -#include -#include -#include #include +#include +#include namespace xgboost { @@ -298,43 +299,6 @@ class JsonBoolean : public Value { } }; -struct StringView { - private: - using CharT = char; // unsigned char - using Traits = std::char_traits; - CharT const* str_; - size_t size_; - - public: - StringView() = default; - StringView(CharT const* str, size_t size) : str_{str}, size_{size} {} - explicit StringView(std::string const& str): str_{str.c_str()}, size_{str.size()} {} - explicit StringView(CharT const* str) : str_{str}, size_{Traits::length(str)} {} - - CharT const& operator[](size_t p) const { return str_[p]; } - CharT const& at(size_t p) const { // NOLINT - CHECK_LT(p, size_); - return str_[p]; - } - size_t size() const { return size_; } // NOLINT - // Copies a portion of string. Since we don't have std::from_chars and friends here, so - // copying substring is necessary for appending `\0`. It's not too bad since string by - // default has small vector optimization, which is enabled by most if not all modern - // compilers for numeric values. - std::string substr(size_t beg, size_t n) const { // NOLINT - CHECK_LE(beg, size_); - return std::string {str_ + beg, n < (size_ - beg) ? n : (size_ - beg)}; - } - CharT const* c_str() const { return str_; } // NOLINT - - CharT const* cbegin() const { return str_; } // NOLINT - CharT const* cend() const { return str_ + size(); } // NOLINT - CharT const* begin() const { return str_; } // NOLINT - CharT const* end() const { return str_ + size(); } // NOLINT -}; - -std::ostream &operator<<(std::ostream &os, StringView const v); - /*! * \brief Data structure representing JSON format. * @@ -451,7 +415,7 @@ class Json { }; template -bool IsA(Json const j) { +bool IsA(Json const& j) { auto const& v = j.GetValue(); return IsA(&v); } diff --git a/include/xgboost/json_io.h b/include/xgboost/json_io.h index 12118e23a..d781ecd4f 100644 --- a/include/xgboost/json_io.h +++ b/include/xgboost/json_io.h @@ -84,12 +84,12 @@ class JsonReader { std::string msg = "Expecting: \""; msg += c; msg += "\", got: \""; - if (got == -1) { + if (got == EOF) { msg += "EOF\""; } else if (got == 0) { msg += "\\0\""; } else { - msg += std::to_string(got) + " \""; + msg += (got <= 127 ? std::string{got} : std::to_string(got)) + " \""; // NOLINT } Error(msg); } diff --git a/include/xgboost/string_view.h b/include/xgboost/string_view.h new file mode 100644 index 000000000..aee52f7a5 --- /dev/null +++ b/include/xgboost/string_view.h @@ -0,0 +1,81 @@ +/*! + * Copyright 2021 by XGBoost Contributors + */ +#ifndef XGBOOST_STRING_VIEW_H_ +#define XGBOOST_STRING_VIEW_H_ +#include + +#include +#include +#include +#include + +namespace xgboost { +struct StringView { + private: + using CharT = char; // unsigned char + using Traits = std::char_traits; + CharT const* str_{nullptr}; + size_t size_{0}; + + public: + using iterator = const CharT*; // NOLINT + using const_iterator = iterator; // NOLINT + using reverse_iterator = std::reverse_iterator; // NOLINT + using const_reverse_iterator = reverse_iterator; // NOLINT + + public: + constexpr StringView() = default; + constexpr StringView(CharT const* str, size_t size) : str_{str}, size_{size} {} + explicit StringView(std::string const& str) : str_{str.c_str()}, size_{str.size()} {} + StringView(CharT const* str) : str_{str}, size_{Traits::length(str)} {} // NOLINT + + CharT const& operator[](size_t p) const { return str_[p]; } + CharT const& at(size_t p) const { // NOLINT + CHECK_LT(p, size_); + return str_[p]; + } + constexpr size_t size() const { return size_; } // NOLINT + StringView substr(size_t beg, size_t n) const { // NOLINT + CHECK_LE(beg, size_); + size_t len = std::min(n, size_ - beg); + return {str_ + beg, len}; + } + CharT const* c_str() const { return str_; } // NOLINT + + constexpr CharT const* cbegin() const { return str_; } // NOLINT + constexpr CharT const* cend() const { return str_ + size(); } // NOLINT + constexpr CharT const* begin() const { return str_; } // NOLINT + constexpr CharT const* end() const { return str_ + size(); } // NOLINT + + const_reverse_iterator rbegin() const noexcept { // NOLINT + return const_reverse_iterator(this->end()); + } + const_reverse_iterator crbegin() const noexcept { // NOLINT + return const_reverse_iterator(this->end()); + } + const_reverse_iterator rend() const noexcept { // NOLINT + return const_reverse_iterator(this->begin()); + } + const_reverse_iterator crend() const noexcept { // NOLINT + return const_reverse_iterator(this->begin()); + } +}; + +inline std::ostream& operator<<(std::ostream& os, StringView const v) { + for (auto c : v) { + os.put(c); + } + return os; +} + +inline bool operator==(StringView l, StringView r) { + if (l.size() != r.size()) { + return false; + } + return std::equal(l.cbegin(), l.cend(), r.cbegin()); +} + +inline bool operator!=(StringView l, StringView r) { return !(l == r); } +} // namespace xgboost +#endif // XGBOOST_STRING_VIEW_H_ diff --git a/src/common/json.cc b/src/common/json.cc index 8d6e8fc2f..924f1cecd 100644 --- a/src/common/json.cc +++ b/src/common/json.cc @@ -1,19 +1,21 @@ /*! * Copyright (c) by Contributors 2019-2021 */ +#include "xgboost/json.h" + #include +#include #include #include +#include #include #include -#include -#include #include "charconv.h" #include "xgboost/base.h" -#include "xgboost/logging.h" -#include "xgboost/json.h" #include "xgboost/json_io.h" +#include "xgboost/logging.h" +#include "xgboost/string_view.h" namespace xgboost { @@ -416,7 +418,8 @@ Json JsonReader::Load() { void JsonReader::Error(std::string msg) const { // just copy it. - std::istringstream str_s(raw_str_.substr(0, raw_str_.size())); + std::stringstream str_s; + str_s << raw_str_.substr(0, raw_str_.size()); msg += ", around character position: " + std::to_string(cursor_.Pos()); msg += '\n'; @@ -431,7 +434,7 @@ void JsonReader::Error(std::string msg) const { auto end = cursor_.Pos() + kExtend >= raw_str_.size() ? raw_str_.size() : cursor_.Pos() + kExtend; - std::string const& raw_portion = raw_str_.substr(beg, end - beg); + auto raw_portion = raw_str_.substr(beg, end - beg); std::string portion; for (auto c : raw_portion) { if (c == '\n') { @@ -745,13 +748,6 @@ void Json::Dump(Json json, std::string* str) { Json& Json::operator=(Json const &other) = default; -std::ostream &operator<<(std::ostream &os, StringView const v) { - for (auto c : v) { - os.put(c); - } - return os; -} - static_assert(std::is_nothrow_move_constructible::value, ""); static_assert(std::is_nothrow_move_constructible::value, ""); static_assert(std::is_nothrow_move_constructible::value, ""); diff --git a/tests/cpp/common/test_json.cc b/tests/cpp/common/test_json.cc index d220e0444..363321709 100644 --- a/tests/cpp/common/test_json.cc +++ b/tests/cpp/common/test_json.cc @@ -457,10 +457,10 @@ TEST(Json, Invalid) { bool has_thrown = false; try { Json load{Json::Load(StringView(str.c_str(), str.size()))}; - } catch (dmlc::Error const &e) { + } catch (dmlc::Error const& e) { std::string msg = e.what(); - ASSERT_TRUE(msg.find("EOF") != std::string::npos - || msg.find("255") != std::string::npos); // EOF is printed as 255 on s390x + // EOF is printed as 255 on s390x + ASSERT_TRUE(msg.find("EOF") != std::string::npos || msg.find("255") != std::string::npos); has_thrown = true; }; ASSERT_TRUE(has_thrown); @@ -580,14 +580,4 @@ TEST(Json, DISABLED_RoundTripExhaustive) { test(static_cast(i)); } } - -TEST(StringView, Basic) { - StringView str{"This is a string."}; - std::stringstream ss; - ss << str; - - std::string res = ss.str(); - ASSERT_EQ(str.size(), res.size()); - ASSERT_TRUE(std::equal(res.cbegin(), res.cend(), str.cbegin())); -} } // namespace xgboost diff --git a/tests/cpp/common/test_string_view.cc b/tests/cpp/common/test_string_view.cc new file mode 100644 index 000000000..b2ba24c71 --- /dev/null +++ b/tests/cpp/common/test_string_view.cc @@ -0,0 +1,28 @@ +/*! + * Copyright (c) by XGBoost Contributors 2021 + */ +#include +#include +#include +namespace xgboost { +TEST(StringView, Basic) { + StringView str{"This is a string."}; + std::stringstream ss; + ss << str; + + std::string res = ss.str(); + ASSERT_EQ(str.size(), res.size()); + ASSERT_TRUE(std::equal(res.cbegin(), res.cend(), str.cbegin())); + + auto substr = str.substr(5, 2); + ASSERT_EQ(substr.size(), 2); + + ASSERT_EQ(StringView{"is"}.size(), 2); + ASSERT_TRUE(substr == "is"); + ASSERT_FALSE(substr != "is"); + ASSERT_FALSE(substr == "foobar"); + ASSERT_FALSE(substr == "i"); + + ASSERT_TRUE(std::equal(substr.crbegin(), substr.crend(), StringView{"si"}.cbegin())); +} +} // namespace xgboost