Extract string view. (#7416)
* Add equality operators. * Return a view in substr. * Add proper iterator types.
This commit is contained in:
parent
ca6f980932
commit
937fa282b5
@ -9,6 +9,7 @@
|
||||
#include <atomic>
|
||||
#include <cinttypes>
|
||||
#include <functional>
|
||||
#include <ostream>
|
||||
|
||||
namespace xgboost {
|
||||
/*!
|
||||
|
||||
@ -4,16 +4,17 @@
|
||||
#ifndef XGBOOST_JSON_H_
|
||||
#define XGBOOST_JSON_H_
|
||||
|
||||
#include <xgboost/intrusive_ptr.h>
|
||||
#include <xgboost/logging.h>
|
||||
#include <xgboost/parameter.h>
|
||||
#include <xgboost/intrusive_ptr.h>
|
||||
#include <xgboost/string_view.h>
|
||||
|
||||
#include <functional>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <functional>
|
||||
#include <utility>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
namespace xgboost {
|
||||
|
||||
@ -298,43 +299,6 @@ class JsonBoolean : public Value {
|
||||
}
|
||||
};
|
||||
|
||||
struct StringView {
|
||||
private:
|
||||
using CharT = char; // unsigned char
|
||||
using Traits = std::char_traits<CharT>;
|
||||
CharT const* str_;
|
||||
size_t size_;
|
||||
|
||||
public:
|
||||
StringView() = default;
|
||||
StringView(CharT const* str, size_t size) : str_{str}, size_{size} {}
|
||||
explicit StringView(std::string const& str): str_{str.c_str()}, size_{str.size()} {}
|
||||
explicit StringView(CharT const* str) : str_{str}, size_{Traits::length(str)} {}
|
||||
|
||||
CharT const& operator[](size_t p) const { return str_[p]; }
|
||||
CharT const& at(size_t p) const { // NOLINT
|
||||
CHECK_LT(p, size_);
|
||||
return str_[p];
|
||||
}
|
||||
size_t size() const { return size_; } // NOLINT
|
||||
// Copies a portion of string. Since we don't have std::from_chars and friends here, so
|
||||
// copying substring is necessary for appending `\0`. It's not too bad since string by
|
||||
// default has small vector optimization, which is enabled by most if not all modern
|
||||
// compilers for numeric values.
|
||||
std::string substr(size_t beg, size_t n) const { // NOLINT
|
||||
CHECK_LE(beg, size_);
|
||||
return std::string {str_ + beg, n < (size_ - beg) ? n : (size_ - beg)};
|
||||
}
|
||||
CharT const* c_str() const { return str_; } // NOLINT
|
||||
|
||||
CharT const* cbegin() const { return str_; } // NOLINT
|
||||
CharT const* cend() const { return str_ + size(); } // NOLINT
|
||||
CharT const* begin() const { return str_; } // NOLINT
|
||||
CharT const* end() const { return str_ + size(); } // NOLINT
|
||||
};
|
||||
|
||||
std::ostream &operator<<(std::ostream &os, StringView const v);
|
||||
|
||||
/*!
|
||||
* \brief Data structure representing JSON format.
|
||||
*
|
||||
@ -451,7 +415,7 @@ class Json {
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
bool IsA(Json const j) {
|
||||
bool IsA(Json const& j) {
|
||||
auto const& v = j.GetValue();
|
||||
return IsA<T>(&v);
|
||||
}
|
||||
|
||||
@ -84,12 +84,12 @@ class JsonReader {
|
||||
std::string msg = "Expecting: \"";
|
||||
msg += c;
|
||||
msg += "\", got: \"";
|
||||
if (got == -1) {
|
||||
if (got == EOF) {
|
||||
msg += "EOF\"";
|
||||
} else if (got == 0) {
|
||||
msg += "\\0\"";
|
||||
} else {
|
||||
msg += std::to_string(got) + " \"";
|
||||
msg += (got <= 127 ? std::string{got} : std::to_string(got)) + " \""; // NOLINT
|
||||
}
|
||||
Error(msg);
|
||||
}
|
||||
|
||||
81
include/xgboost/string_view.h
Normal file
81
include/xgboost/string_view.h
Normal file
@ -0,0 +1,81 @@
|
||||
/*!
|
||||
* Copyright 2021 by XGBoost Contributors
|
||||
*/
|
||||
#ifndef XGBOOST_STRING_VIEW_H_
|
||||
#define XGBOOST_STRING_VIEW_H_
|
||||
#include <xgboost/logging.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <iterator>
|
||||
#include <ostream>
|
||||
#include <string>
|
||||
|
||||
namespace xgboost {
|
||||
struct StringView {
|
||||
private:
|
||||
using CharT = char; // unsigned char
|
||||
using Traits = std::char_traits<CharT>;
|
||||
CharT const* str_{nullptr};
|
||||
size_t size_{0};
|
||||
|
||||
public:
|
||||
using iterator = const CharT*; // NOLINT
|
||||
using const_iterator = iterator; // NOLINT
|
||||
using reverse_iterator = std::reverse_iterator<const_iterator>; // NOLINT
|
||||
using const_reverse_iterator = reverse_iterator; // NOLINT
|
||||
|
||||
public:
|
||||
constexpr StringView() = default;
|
||||
constexpr StringView(CharT const* str, size_t size) : str_{str}, size_{size} {}
|
||||
explicit StringView(std::string const& str) : str_{str.c_str()}, size_{str.size()} {}
|
||||
StringView(CharT const* str) : str_{str}, size_{Traits::length(str)} {} // NOLINT
|
||||
|
||||
CharT const& operator[](size_t p) const { return str_[p]; }
|
||||
CharT const& at(size_t p) const { // NOLINT
|
||||
CHECK_LT(p, size_);
|
||||
return str_[p];
|
||||
}
|
||||
constexpr size_t size() const { return size_; } // NOLINT
|
||||
StringView substr(size_t beg, size_t n) const { // NOLINT
|
||||
CHECK_LE(beg, size_);
|
||||
size_t len = std::min(n, size_ - beg);
|
||||
return {str_ + beg, len};
|
||||
}
|
||||
CharT const* c_str() const { return str_; } // NOLINT
|
||||
|
||||
constexpr CharT const* cbegin() const { return str_; } // NOLINT
|
||||
constexpr CharT const* cend() const { return str_ + size(); } // NOLINT
|
||||
constexpr CharT const* begin() const { return str_; } // NOLINT
|
||||
constexpr CharT const* end() const { return str_ + size(); } // NOLINT
|
||||
|
||||
const_reverse_iterator rbegin() const noexcept { // NOLINT
|
||||
return const_reverse_iterator(this->end());
|
||||
}
|
||||
const_reverse_iterator crbegin() const noexcept { // NOLINT
|
||||
return const_reverse_iterator(this->end());
|
||||
}
|
||||
const_reverse_iterator rend() const noexcept { // NOLINT
|
||||
return const_reverse_iterator(this->begin());
|
||||
}
|
||||
const_reverse_iterator crend() const noexcept { // NOLINT
|
||||
return const_reverse_iterator(this->begin());
|
||||
}
|
||||
};
|
||||
|
||||
inline std::ostream& operator<<(std::ostream& os, StringView const v) {
|
||||
for (auto c : v) {
|
||||
os.put(c);
|
||||
}
|
||||
return os;
|
||||
}
|
||||
|
||||
inline bool operator==(StringView l, StringView r) {
|
||||
if (l.size() != r.size()) {
|
||||
return false;
|
||||
}
|
||||
return std::equal(l.cbegin(), l.cend(), r.cbegin());
|
||||
}
|
||||
|
||||
inline bool operator!=(StringView l, StringView r) { return !(l == r); }
|
||||
} // namespace xgboost
|
||||
#endif // XGBOOST_STRING_VIEW_H_
|
||||
@ -1,19 +1,21 @@
|
||||
/*!
|
||||
* Copyright (c) by Contributors 2019-2021
|
||||
*/
|
||||
#include "xgboost/json.h"
|
||||
|
||||
#include <cctype>
|
||||
#include <cmath>
|
||||
#include <cstddef>
|
||||
#include <iterator>
|
||||
#include <limits>
|
||||
#include <locale>
|
||||
#include <sstream>
|
||||
#include <limits>
|
||||
#include <cmath>
|
||||
|
||||
#include "charconv.h"
|
||||
#include "xgboost/base.h"
|
||||
#include "xgboost/logging.h"
|
||||
#include "xgboost/json.h"
|
||||
#include "xgboost/json_io.h"
|
||||
#include "xgboost/logging.h"
|
||||
#include "xgboost/string_view.h"
|
||||
|
||||
namespace xgboost {
|
||||
|
||||
@ -416,7 +418,8 @@ Json JsonReader::Load() {
|
||||
|
||||
void JsonReader::Error(std::string msg) const {
|
||||
// just copy it.
|
||||
std::istringstream str_s(raw_str_.substr(0, raw_str_.size()));
|
||||
std::stringstream str_s;
|
||||
str_s << raw_str_.substr(0, raw_str_.size());
|
||||
|
||||
msg += ", around character position: " + std::to_string(cursor_.Pos());
|
||||
msg += '\n';
|
||||
@ -431,7 +434,7 @@ void JsonReader::Error(std::string msg) const {
|
||||
auto end = cursor_.Pos() + kExtend >= raw_str_.size() ?
|
||||
raw_str_.size() : cursor_.Pos() + kExtend;
|
||||
|
||||
std::string const& raw_portion = raw_str_.substr(beg, end - beg);
|
||||
auto raw_portion = raw_str_.substr(beg, end - beg);
|
||||
std::string portion;
|
||||
for (auto c : raw_portion) {
|
||||
if (c == '\n') {
|
||||
@ -745,13 +748,6 @@ void Json::Dump(Json json, std::string* str) {
|
||||
|
||||
Json& Json::operator=(Json const &other) = default;
|
||||
|
||||
std::ostream &operator<<(std::ostream &os, StringView const v) {
|
||||
for (auto c : v) {
|
||||
os.put(c);
|
||||
}
|
||||
return os;
|
||||
}
|
||||
|
||||
static_assert(std::is_nothrow_move_constructible<Json>::value, "");
|
||||
static_assert(std::is_nothrow_move_constructible<Object>::value, "");
|
||||
static_assert(std::is_nothrow_move_constructible<Array>::value, "");
|
||||
|
||||
@ -459,8 +459,8 @@ TEST(Json, Invalid) {
|
||||
Json load{Json::Load(StringView(str.c_str(), str.size()))};
|
||||
} catch (dmlc::Error const& e) {
|
||||
std::string msg = e.what();
|
||||
ASSERT_TRUE(msg.find("EOF") != std::string::npos
|
||||
|| msg.find("255") != std::string::npos); // EOF is printed as 255 on s390x
|
||||
// EOF is printed as 255 on s390x
|
||||
ASSERT_TRUE(msg.find("EOF") != std::string::npos || msg.find("255") != std::string::npos);
|
||||
has_thrown = true;
|
||||
};
|
||||
ASSERT_TRUE(has_thrown);
|
||||
@ -580,14 +580,4 @@ TEST(Json, DISABLED_RoundTripExhaustive) {
|
||||
test(static_cast<uint32_t>(i));
|
||||
}
|
||||
}
|
||||
|
||||
TEST(StringView, Basic) {
|
||||
StringView str{"This is a string."};
|
||||
std::stringstream ss;
|
||||
ss << str;
|
||||
|
||||
std::string res = ss.str();
|
||||
ASSERT_EQ(str.size(), res.size());
|
||||
ASSERT_TRUE(std::equal(res.cbegin(), res.cend(), str.cbegin()));
|
||||
}
|
||||
} // namespace xgboost
|
||||
|
||||
28
tests/cpp/common/test_string_view.cc
Normal file
28
tests/cpp/common/test_string_view.cc
Normal file
@ -0,0 +1,28 @@
|
||||
/*!
|
||||
* Copyright (c) by XGBoost Contributors 2021
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/string_view.h>
|
||||
#include <string_view>
|
||||
namespace xgboost {
|
||||
TEST(StringView, Basic) {
|
||||
StringView str{"This is a string."};
|
||||
std::stringstream ss;
|
||||
ss << str;
|
||||
|
||||
std::string res = ss.str();
|
||||
ASSERT_EQ(str.size(), res.size());
|
||||
ASSERT_TRUE(std::equal(res.cbegin(), res.cend(), str.cbegin()));
|
||||
|
||||
auto substr = str.substr(5, 2);
|
||||
ASSERT_EQ(substr.size(), 2);
|
||||
|
||||
ASSERT_EQ(StringView{"is"}.size(), 2);
|
||||
ASSERT_TRUE(substr == "is");
|
||||
ASSERT_FALSE(substr != "is");
|
||||
ASSERT_FALSE(substr == "foobar");
|
||||
ASSERT_FALSE(substr == "i");
|
||||
|
||||
ASSERT_TRUE(std::equal(substr.crbegin(), substr.crend(), StringView{"si"}.cbegin()));
|
||||
}
|
||||
} // namespace xgboost
|
||||
Loading…
x
Reference in New Issue
Block a user