Extract string view. (#7416)

* Add equality operators.
* Return a view in substr.
* Add proper iterator types.
This commit is contained in:
Jiaming Yuan 2021-11-12 18:22:30 +08:00 committed by GitHub
parent ca6f980932
commit 937fa282b5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 130 additions and 70 deletions

View File

@ -9,6 +9,7 @@
#include <atomic> #include <atomic>
#include <cinttypes> #include <cinttypes>
#include <functional> #include <functional>
#include <ostream>
namespace xgboost { namespace xgboost {
/*! /*!

View File

@ -4,16 +4,17 @@
#ifndef XGBOOST_JSON_H_ #ifndef XGBOOST_JSON_H_
#define XGBOOST_JSON_H_ #define XGBOOST_JSON_H_
#include <xgboost/intrusive_ptr.h>
#include <xgboost/logging.h> #include <xgboost/logging.h>
#include <xgboost/parameter.h> #include <xgboost/parameter.h>
#include <xgboost/intrusive_ptr.h> #include <xgboost/string_view.h>
#include <functional>
#include <map> #include <map>
#include <memory> #include <memory>
#include <vector>
#include <functional>
#include <utility>
#include <string> #include <string>
#include <utility>
#include <vector>
namespace xgboost { namespace xgboost {
@ -298,43 +299,6 @@ class JsonBoolean : public Value {
} }
}; };
struct StringView {
private:
using CharT = char; // unsigned char
using Traits = std::char_traits<CharT>;
CharT const* str_;
size_t size_;
public:
StringView() = default;
StringView(CharT const* str, size_t size) : str_{str}, size_{size} {}
explicit StringView(std::string const& str): str_{str.c_str()}, size_{str.size()} {}
explicit StringView(CharT const* str) : str_{str}, size_{Traits::length(str)} {}
CharT const& operator[](size_t p) const { return str_[p]; }
CharT const& at(size_t p) const { // NOLINT
CHECK_LT(p, size_);
return str_[p];
}
size_t size() const { return size_; } // NOLINT
// Copies a portion of string. Since we don't have std::from_chars and friends here, so
// copying substring is necessary for appending `\0`. It's not too bad since string by
// default has small vector optimization, which is enabled by most if not all modern
// compilers for numeric values.
std::string substr(size_t beg, size_t n) const { // NOLINT
CHECK_LE(beg, size_);
return std::string {str_ + beg, n < (size_ - beg) ? n : (size_ - beg)};
}
CharT const* c_str() const { return str_; } // NOLINT
CharT const* cbegin() const { return str_; } // NOLINT
CharT const* cend() const { return str_ + size(); } // NOLINT
CharT const* begin() const { return str_; } // NOLINT
CharT const* end() const { return str_ + size(); } // NOLINT
};
std::ostream &operator<<(std::ostream &os, StringView const v);
/*! /*!
* \brief Data structure representing JSON format. * \brief Data structure representing JSON format.
* *
@ -451,7 +415,7 @@ class Json {
}; };
template <typename T> template <typename T>
bool IsA(Json const j) { bool IsA(Json const& j) {
auto const& v = j.GetValue(); auto const& v = j.GetValue();
return IsA<T>(&v); return IsA<T>(&v);
} }

View File

@ -84,12 +84,12 @@ class JsonReader {
std::string msg = "Expecting: \""; std::string msg = "Expecting: \"";
msg += c; msg += c;
msg += "\", got: \""; msg += "\", got: \"";
if (got == -1) { if (got == EOF) {
msg += "EOF\""; msg += "EOF\"";
} else if (got == 0) { } else if (got == 0) {
msg += "\\0\""; msg += "\\0\"";
} else { } else {
msg += std::to_string(got) + " \""; msg += (got <= 127 ? std::string{got} : std::to_string(got)) + " \""; // NOLINT
} }
Error(msg); Error(msg);
} }

View File

@ -0,0 +1,81 @@
/*!
* Copyright 2021 by XGBoost Contributors
*/
#ifndef XGBOOST_STRING_VIEW_H_
#define XGBOOST_STRING_VIEW_H_
#include <xgboost/logging.h>
#include <algorithm>
#include <iterator>
#include <ostream>
#include <string>
namespace xgboost {
struct StringView {
private:
using CharT = char; // unsigned char
using Traits = std::char_traits<CharT>;
CharT const* str_{nullptr};
size_t size_{0};
public:
using iterator = const CharT*; // NOLINT
using const_iterator = iterator; // NOLINT
using reverse_iterator = std::reverse_iterator<const_iterator>; // NOLINT
using const_reverse_iterator = reverse_iterator; // NOLINT
public:
constexpr StringView() = default;
constexpr StringView(CharT const* str, size_t size) : str_{str}, size_{size} {}
explicit StringView(std::string const& str) : str_{str.c_str()}, size_{str.size()} {}
StringView(CharT const* str) : str_{str}, size_{Traits::length(str)} {} // NOLINT
CharT const& operator[](size_t p) const { return str_[p]; }
CharT const& at(size_t p) const { // NOLINT
CHECK_LT(p, size_);
return str_[p];
}
constexpr size_t size() const { return size_; } // NOLINT
StringView substr(size_t beg, size_t n) const { // NOLINT
CHECK_LE(beg, size_);
size_t len = std::min(n, size_ - beg);
return {str_ + beg, len};
}
CharT const* c_str() const { return str_; } // NOLINT
constexpr CharT const* cbegin() const { return str_; } // NOLINT
constexpr CharT const* cend() const { return str_ + size(); } // NOLINT
constexpr CharT const* begin() const { return str_; } // NOLINT
constexpr CharT const* end() const { return str_ + size(); } // NOLINT
const_reverse_iterator rbegin() const noexcept { // NOLINT
return const_reverse_iterator(this->end());
}
const_reverse_iterator crbegin() const noexcept { // NOLINT
return const_reverse_iterator(this->end());
}
const_reverse_iterator rend() const noexcept { // NOLINT
return const_reverse_iterator(this->begin());
}
const_reverse_iterator crend() const noexcept { // NOLINT
return const_reverse_iterator(this->begin());
}
};
inline std::ostream& operator<<(std::ostream& os, StringView const v) {
for (auto c : v) {
os.put(c);
}
return os;
}
inline bool operator==(StringView l, StringView r) {
if (l.size() != r.size()) {
return false;
}
return std::equal(l.cbegin(), l.cend(), r.cbegin());
}
inline bool operator!=(StringView l, StringView r) { return !(l == r); }
} // namespace xgboost
#endif // XGBOOST_STRING_VIEW_H_

View File

@ -1,19 +1,21 @@
/*! /*!
* Copyright (c) by Contributors 2019-2021 * Copyright (c) by Contributors 2019-2021
*/ */
#include "xgboost/json.h"
#include <cctype> #include <cctype>
#include <cmath>
#include <cstddef> #include <cstddef>
#include <iterator> #include <iterator>
#include <limits>
#include <locale> #include <locale>
#include <sstream> #include <sstream>
#include <limits>
#include <cmath>
#include "charconv.h" #include "charconv.h"
#include "xgboost/base.h" #include "xgboost/base.h"
#include "xgboost/logging.h"
#include "xgboost/json.h"
#include "xgboost/json_io.h" #include "xgboost/json_io.h"
#include "xgboost/logging.h"
#include "xgboost/string_view.h"
namespace xgboost { namespace xgboost {
@ -416,7 +418,8 @@ Json JsonReader::Load() {
void JsonReader::Error(std::string msg) const { void JsonReader::Error(std::string msg) const {
// just copy it. // just copy it.
std::istringstream str_s(raw_str_.substr(0, raw_str_.size())); std::stringstream str_s;
str_s << raw_str_.substr(0, raw_str_.size());
msg += ", around character position: " + std::to_string(cursor_.Pos()); msg += ", around character position: " + std::to_string(cursor_.Pos());
msg += '\n'; msg += '\n';
@ -431,7 +434,7 @@ void JsonReader::Error(std::string msg) const {
auto end = cursor_.Pos() + kExtend >= raw_str_.size() ? auto end = cursor_.Pos() + kExtend >= raw_str_.size() ?
raw_str_.size() : cursor_.Pos() + kExtend; raw_str_.size() : cursor_.Pos() + kExtend;
std::string const& raw_portion = raw_str_.substr(beg, end - beg); auto raw_portion = raw_str_.substr(beg, end - beg);
std::string portion; std::string portion;
for (auto c : raw_portion) { for (auto c : raw_portion) {
if (c == '\n') { if (c == '\n') {
@ -745,13 +748,6 @@ void Json::Dump(Json json, std::string* str) {
Json& Json::operator=(Json const &other) = default; Json& Json::operator=(Json const &other) = default;
std::ostream &operator<<(std::ostream &os, StringView const v) {
for (auto c : v) {
os.put(c);
}
return os;
}
static_assert(std::is_nothrow_move_constructible<Json>::value, ""); static_assert(std::is_nothrow_move_constructible<Json>::value, "");
static_assert(std::is_nothrow_move_constructible<Object>::value, ""); static_assert(std::is_nothrow_move_constructible<Object>::value, "");
static_assert(std::is_nothrow_move_constructible<Array>::value, ""); static_assert(std::is_nothrow_move_constructible<Array>::value, "");

View File

@ -457,10 +457,10 @@ TEST(Json, Invalid) {
bool has_thrown = false; bool has_thrown = false;
try { try {
Json load{Json::Load(StringView(str.c_str(), str.size()))}; Json load{Json::Load(StringView(str.c_str(), str.size()))};
} catch (dmlc::Error const &e) { } catch (dmlc::Error const& e) {
std::string msg = e.what(); std::string msg = e.what();
ASSERT_TRUE(msg.find("EOF") != std::string::npos // EOF is printed as 255 on s390x
|| msg.find("255") != std::string::npos); // EOF is printed as 255 on s390x ASSERT_TRUE(msg.find("EOF") != std::string::npos || msg.find("255") != std::string::npos);
has_thrown = true; has_thrown = true;
}; };
ASSERT_TRUE(has_thrown); ASSERT_TRUE(has_thrown);
@ -580,14 +580,4 @@ TEST(Json, DISABLED_RoundTripExhaustive) {
test(static_cast<uint32_t>(i)); test(static_cast<uint32_t>(i));
} }
} }
TEST(StringView, Basic) {
StringView str{"This is a string."};
std::stringstream ss;
ss << str;
std::string res = ss.str();
ASSERT_EQ(str.size(), res.size());
ASSERT_TRUE(std::equal(res.cbegin(), res.cend(), str.cbegin()));
}
} // namespace xgboost } // namespace xgboost

View File

@ -0,0 +1,28 @@
/*!
* Copyright (c) by XGBoost Contributors 2021
*/
#include <gtest/gtest.h>
#include <xgboost/string_view.h>
#include <string_view>
namespace xgboost {
TEST(StringView, Basic) {
StringView str{"This is a string."};
std::stringstream ss;
ss << str;
std::string res = ss.str();
ASSERT_EQ(str.size(), res.size());
ASSERT_TRUE(std::equal(res.cbegin(), res.cend(), str.cbegin()));
auto substr = str.substr(5, 2);
ASSERT_EQ(substr.size(), 2);
ASSERT_EQ(StringView{"is"}.size(), 2);
ASSERT_TRUE(substr == "is");
ASSERT_FALSE(substr != "is");
ASSERT_FALSE(substr == "foobar");
ASSERT_FALSE(substr == "i");
ASSERT_TRUE(std::equal(substr.crbegin(), substr.crend(), StringView{"si"}.cbegin()));
}
} // namespace xgboost