File indexing completed on 2024-05-05 05:50:42
0001 /* 0002 SPDX-FileCopyrightText: 2020-2021 Klarälvdalens Datakonsult AB a KDAB Group company, info@kdab.com, author Marc Mutz <marc.mutz@kdab.com> 0003 0004 This file is part of KDToolBox (https://github.com/KDAB/KDToolBox). 0005 0006 SPDX-License-Identifier: MIT 0007 */ 0008 0009 #ifndef QSTRINGTOKENIZER_H 0010 #define QSTRINGTOKENIZER_H 0011 0012 #include <qnamespace.h> 0013 0014 QT_BEGIN_NAMESPACE 0015 0016 template<typename, typename> 0017 class QStringBuilder; 0018 template<typename> 0019 class QList; 0020 0021 QT_END_NAMESPACE 0022 0023 #if defined(Q_QDOC) || (defined(__cpp_range_based_for) && __cpp_range_based_for >= 201603) 0024 #define Q_STRINGTOKENIZER_USE_SENTINEL 0025 #endif 0026 0027 class QStringTokenizerBaseBase 0028 { 0029 protected: 0030 ~QStringTokenizerBaseBase() = default; 0031 Q_DECL_CONSTEXPR QStringTokenizerBaseBase(Qt::SplitBehavior sb, Qt::CaseSensitivity cs) noexcept 0032 : m_sb{sb} 0033 , m_cs{cs} 0034 { 0035 } 0036 0037 struct tokenizer_state { 0038 qsizetype start, end, extra; 0039 friend constexpr bool operator==(tokenizer_state lhs, tokenizer_state rhs) noexcept 0040 { 0041 return lhs.start == rhs.start && lhs.end == rhs.end && lhs.extra == rhs.extra; 0042 } 0043 friend constexpr bool operator!=(tokenizer_state lhs, tokenizer_state rhs) noexcept 0044 { 0045 return !operator==(lhs, rhs); 0046 } 0047 }; 0048 0049 Qt::SplitBehavior m_sb; 0050 Qt::CaseSensitivity m_cs; 0051 }; 0052 0053 template<typename Haystack, typename Needle> 0054 class QStringTokenizerBase : protected QStringTokenizerBaseBase 0055 { 0056 struct next_result { 0057 Haystack value; 0058 bool ok; 0059 tokenizer_state state; 0060 }; 0061 inline next_result next(tokenizer_state state) const noexcept; 0062 inline next_result toFront() const noexcept 0063 { 0064 return next({}); 0065 } 0066 0067 public: 0068 constexpr explicit QStringTokenizerBase(Haystack haystack, Needle needle, Qt::SplitBehavior sb, Qt::CaseSensitivity cs) noexcept 0069 : QStringTokenizerBaseBase{sb, cs} 0070 , m_haystack{haystack} 0071 , m_needle{needle} 0072 { 0073 } 0074 0075 class iterator; 0076 friend class iterator; 0077 #ifdef Q_STRINGTOKENIZER_USE_SENTINEL 0078 class sentinel 0079 { 0080 friend constexpr bool operator==(sentinel, sentinel) noexcept 0081 { 0082 return true; 0083 } 0084 friend constexpr bool operator!=(sentinel, sentinel) noexcept 0085 { 0086 return false; 0087 } 0088 }; 0089 #else 0090 using sentinel = iterator; 0091 #endif 0092 class iterator 0093 { 0094 const QStringTokenizerBase *tokenizer; 0095 next_result current; 0096 friend class QStringTokenizerBase; 0097 explicit iterator(const QStringTokenizerBase &t) noexcept 0098 : tokenizer{&t} 0099 , current{t.toFront()} 0100 { 0101 } 0102 0103 public: 0104 using difference_type = qsizetype; 0105 using value_type = Haystack; 0106 using pointer = const value_type *; 0107 using reference = const value_type &; 0108 using iterator_category = std::forward_iterator_tag; 0109 0110 iterator() noexcept = default; 0111 0112 // violates std::forward_iterator (returns a reference into the iterator) 0113 Q_REQUIRED_RESULT constexpr const Haystack *operator->() const 0114 { 0115 return Q_ASSERT(current.ok), ¤t.value; 0116 } 0117 Q_REQUIRED_RESULT constexpr const Haystack &operator*() const 0118 { 0119 return *operator->(); 0120 } 0121 0122 iterator &operator++() 0123 { 0124 advance(); 0125 return *this; 0126 } 0127 iterator operator++(int) 0128 { 0129 auto tmp = *this; 0130 advance(); 0131 return tmp; 0132 } 0133 0134 friend constexpr bool operator==(const iterator &lhs, const iterator &rhs) noexcept 0135 { 0136 return lhs.current.ok == rhs.current.ok && (!lhs.current.ok || (Q_ASSERT(lhs.tokenizer == rhs.tokenizer), lhs.current.state == rhs.current.state)); 0137 } 0138 friend constexpr bool operator!=(const iterator &lhs, const iterator &rhs) noexcept 0139 { 0140 return !operator==(lhs, rhs); 0141 } 0142 #ifdef Q_STRINGTOKENIZER_USE_SENTINEL 0143 friend constexpr bool operator==(const iterator &lhs, sentinel) noexcept 0144 { 0145 return !lhs.current.ok; 0146 } 0147 friend constexpr bool operator!=(const iterator &lhs, sentinel) noexcept 0148 { 0149 return !operator==(lhs, sentinel{}); 0150 } 0151 friend constexpr bool operator==(sentinel, const iterator &rhs) noexcept 0152 { 0153 return !rhs.current.ok; 0154 } 0155 friend constexpr bool operator!=(sentinel, const iterator &rhs) noexcept 0156 { 0157 return !operator==(sentinel{}, rhs); 0158 } 0159 #endif 0160 private: 0161 void advance() 0162 { 0163 Q_ASSERT(current.ok); 0164 current = tokenizer->next(current.state); 0165 } 0166 }; 0167 using const_iterator = iterator; 0168 0169 using size_type = std::size_t; 0170 using difference_type = typename iterator::difference_type; 0171 using value_type = typename iterator::value_type; 0172 using pointer = typename iterator::pointer; 0173 using const_pointer = pointer; 0174 using reference = typename iterator::reference; 0175 using const_reference = reference; 0176 0177 Q_REQUIRED_RESULT iterator begin() const noexcept 0178 { 0179 return iterator{*this}; 0180 } 0181 Q_REQUIRED_RESULT iterator cbegin() const noexcept 0182 { 0183 return begin(); 0184 } 0185 template<bool = std::is_same<iterator, sentinel>::value> // ODR protection 0186 Q_REQUIRED_RESULT constexpr sentinel end() const noexcept 0187 { 0188 return {}; 0189 } 0190 template<bool = std::is_same<iterator, sentinel>::value> // ODR protection 0191 Q_REQUIRED_RESULT constexpr sentinel cend() const noexcept 0192 { 0193 return {}; 0194 } 0195 0196 private: 0197 Haystack m_haystack; 0198 Needle m_needle; 0199 }; 0200 0201 #include <qstringview.h> 0202 0203 namespace QtPrivate 0204 { 0205 namespace Tok 0206 { 0207 Q_DECL_CONSTEXPR qsizetype size(QChar) noexcept 0208 { 0209 return 1; 0210 } 0211 template<typename String> 0212 constexpr qsizetype size(const String &s) noexcept 0213 { 0214 return static_cast<qsizetype>(s.size()); 0215 } 0216 0217 template<typename String> 0218 struct ViewForImpl { 0219 }; 0220 template<> 0221 struct ViewForImpl<QStringView> { 0222 using type = QStringView; 0223 }; 0224 template<> 0225 struct ViewForImpl<QLatin1String> { 0226 using type = QLatin1String; 0227 }; 0228 template<> 0229 struct ViewForImpl<QChar> { 0230 using type = QChar; 0231 }; 0232 template<> 0233 struct ViewForImpl<QString> : ViewForImpl<QStringView> { 0234 }; 0235 template<> 0236 struct ViewForImpl<QLatin1Char> : ViewForImpl<QChar> { 0237 }; 0238 template<> 0239 struct ViewForImpl<char16_t> : ViewForImpl<QChar> { 0240 }; 0241 template<> 0242 struct ViewForImpl<char16_t *> : ViewForImpl<QStringView> { 0243 }; 0244 template<> 0245 struct ViewForImpl<const char16_t *> : ViewForImpl<QStringView> { 0246 }; 0247 template<typename LHS, typename RHS> 0248 struct ViewForImpl<QStringBuilder<LHS, RHS>> : ViewForImpl<typename QStringBuilder<LHS, RHS>::ConvertTo> { 0249 }; 0250 template<typename Char, typename... Args> 0251 struct ViewForImpl<std::basic_string<Char, Args...>> : ViewForImpl<Char *> { 0252 }; 0253 #ifdef __cpp_lib_string_view 0254 template<typename Char, typename... Args> 0255 struct ViewForImpl<std::basic_string_view<Char, Args...>> : ViewForImpl<Char *> { 0256 }; 0257 #endif 0258 0259 // This metafunction maps a StringLike to a View (currently, QChar, 0260 // QStringView, QLatin1String). This is what QStringTokenizerBase 0261 // operates on. QStringTokenizer adds pinning to keep rvalues alive 0262 // for the duration of the algorithm. 0263 template<typename String> 0264 using ViewFor = typename ViewForImpl<typename std::decay<String>::type>::type; 0265 0266 // Pinning: 0267 // rvalues of owning string types need to be moved into QStringTokenizer 0268 // to keep them alive for the lifetime of the tokenizer. For lvalues, we 0269 // assume the user takes care of that. 0270 0271 // default: don't pin anything (characters are pinned implicitly) 0272 template<typename String> 0273 struct PinForImpl { 0274 using type = ViewFor<String>; 0275 }; 0276 0277 // rvalue QString -> QString 0278 template<> 0279 struct PinForImpl<QString> { 0280 using type = QString; 0281 }; 0282 0283 // rvalue std::basic_string -> basic_string 0284 template<typename Char, typename... Args> 0285 struct PinForImpl<std::basic_string<Char, Args...>> { 0286 using type = std::basic_string<Char, Args...>; 0287 }; 0288 0289 // rvalue QStringBuilder -> pin as the nested ConvertTo type 0290 template<typename LHS, typename RHS> 0291 struct PinForImpl<QStringBuilder<LHS, RHS>> : PinForImpl<typename QStringBuilder<LHS, RHS>::ConvertTo> { 0292 }; 0293 0294 template<typename StringLike> 0295 using PinFor = typename PinForImpl<typename std::remove_cv<StringLike>::type>::type; 0296 0297 template<typename T> 0298 struct is_owning_string_type : std::false_type { 0299 }; 0300 template<> 0301 struct is_owning_string_type<QString> : std::true_type { 0302 }; 0303 template<typename... Args> 0304 struct is_owning_string_type<std::basic_string<Args...>> : std::true_type { 0305 }; 0306 0307 // unpinned 0308 template<typename T, bool pinned = is_owning_string_type<T>::value> 0309 struct Pinning { 0310 // this is the storage for non-pinned types - no storage 0311 constexpr Pinning(const T &) noexcept 0312 { 0313 } 0314 // Since we don't store something, the view() method needs to be 0315 // given something it can return. 0316 constexpr T view(T t) const noexcept 0317 { 0318 return t; 0319 } 0320 }; 0321 0322 // pinned 0323 template<typename T> 0324 struct Pinning<T, true> { 0325 T m_string; 0326 // specialisation for owning string types (QString, std::u16string): 0327 // stores the string: 0328 constexpr Pinning(T &&s) noexcept 0329 : m_string{std::move(s)} 0330 { 0331 } 0332 // ... and thus view() uses that instead of the argument passed in: 0333 constexpr QStringView view(const T &) const noexcept 0334 { 0335 return m_string; 0336 } 0337 }; 0338 0339 // NeedlePinning and HaystackPinning are there to distinguish them as 0340 // base classes of QStringTokenizer. We use inheritance to reap the 0341 // empty base class optimization. 0342 template<typename T> 0343 struct NeedlePinning : Pinning<T> { 0344 using Pinning<T>::Pinning; 0345 template<typename Arg> 0346 constexpr auto needleView(Arg &&a) const noexcept -> decltype(this->view(std::forward<Arg>(a))) 0347 { 0348 return this->view(std::forward<Arg>(a)); 0349 } 0350 }; 0351 0352 template<typename T> 0353 struct HaystackPinning : Pinning<T> { 0354 using Pinning<T>::Pinning; 0355 template<typename Arg> 0356 constexpr auto haystackView(Arg &&a) const noexcept -> decltype(this->view(std::forward<Arg>(a))) 0357 { 0358 return this->view(std::forward<Arg>(a)); 0359 } 0360 }; 0361 0362 // The Base of a QStringTokenizer is QStringTokenizerBase for the views 0363 // corresponding to the Haystack and Needle template arguments 0364 // 0365 // ie. QStringTokenizer<QString, QString> 0366 // : QStringTokenizerBase<QStringView, QStringView> (+ pinning) 0367 template<typename Haystack, typename Needle> 0368 using TokenizerBase = QStringTokenizerBase<ViewFor<Haystack>, ViewFor<Needle>>; 0369 } // namespace Tok 0370 } // namespace QtPrivate 0371 0372 template<typename Haystack, typename Needle> 0373 class QStringTokenizer : private QtPrivate::Tok::HaystackPinning<Haystack>, 0374 private QtPrivate::Tok::NeedlePinning<Needle>, 0375 public QtPrivate::Tok::TokenizerBase<Haystack, Needle> 0376 { 0377 using HPin = QtPrivate::Tok::HaystackPinning<Haystack>; 0378 using NPin = QtPrivate::Tok::NeedlePinning<Needle>; 0379 using Base = QtPrivate::Tok::TokenizerBase<Haystack, Needle>; 0380 template<typename Container, typename HPin> 0381 struct if_haystack_not_pinned_impl : std::enable_if<std::is_empty<HPin>::value, bool> { 0382 }; 0383 template<typename Container> 0384 using if_haystack_not_pinned = typename if_haystack_not_pinned_impl<Container, HPin>::type; 0385 template<typename Container, typename Iterator = decltype(std::begin(std::declval<Container>()))> 0386 using if_compatible_container = 0387 typename std::enable_if<std::is_same<typename Base::value_type, typename std::iterator_traits<Iterator>::value_type>::value, bool>::type; 0388 0389 public: 0390 using value_type = typename Base::value_type; 0391 0392 constexpr explicit QStringTokenizer(Haystack haystack, Needle needle, Qt::CaseSensitivity cs, Qt::SplitBehavior sb = Qt::KeepEmptyParts) 0393 // here, we present the haystack to Pinning<>, for optional storing. 0394 // If it did store, haystack is moved-from and mustn't be touched 0395 // any longer, which is why view() for these Pinning<>s ignores the 0396 // argument. 0397 : HPin{std::forward<Haystack>(haystack)} 0398 , NPin{std::forward<Needle>(needle)} 0399 , 0400 // If Pinning<> didn't store, we pass the haystack (ditto needle) 0401 // to view() again, so it can be copied from there. 0402 Base{this->haystackView(haystack), this->needleView(needle), sb, cs} 0403 { 0404 } 0405 constexpr explicit QStringTokenizer(Haystack haystack, Needle needle, Qt::SplitBehavior sb = Qt::KeepEmptyParts, Qt::CaseSensitivity cs = Qt::CaseSensitive) 0406 : HPin{std::forward<Haystack>(haystack)} 0407 , NPin{std::forward<Needle>(needle)} 0408 , Base{this->haystackView(haystack), this->needleView(needle), sb, cs} 0409 { 0410 } 0411 0412 template<typename Container = QVector<value_type>, if_compatible_container<Container> = true> 0413 Container toContainer(Container &&c = {}) const & 0414 { 0415 for (auto e : *this) 0416 c.push_back(e); 0417 return std::forward<Container>(c); 0418 } 0419 0420 template<typename Container = QVector<value_type>, if_compatible_container<Container> = true, if_haystack_not_pinned<Container> = true> 0421 Container toContainer(Container &&c = {}) const && 0422 { 0423 for (auto e : *this) 0424 c.push_back(e); 0425 return std::forward<Container>(c); 0426 } 0427 }; 0428 0429 namespace QtPrivate 0430 { 0431 namespace Tok 0432 { 0433 // This meta function just calculated the template arguments for the 0434 // QStringTokenizer (not -Base), based on the actual arguments passed 0435 // to qTokenize() (or the ctor, with CTAD). It basically detects rvalue 0436 // QString and std::basic_string and otherwise decays the arguments to 0437 // the respective view type. 0438 // 0439 // #define works around a C++ restriction: [temp.deduct.guide]/3 seems 0440 // to ask for the simple-template-id following the `->` of a deduction 0441 // guide to be identical to the class name for which we guide deduction. 0442 // In particular, Clang rejects a template alias there, while GCC accepts 0443 // it. 0444 #define Q_TOK_RESULT QStringTokenizer<QtPrivate::Tok::PinFor<Haystack>, QtPrivate::Tok::PinFor<Needle>> /*end*/ 0445 template<typename Haystack, typename Needle> 0446 using TokenizerResult = Q_TOK_RESULT; 0447 template<typename Haystack, typename Needle> 0448 using is_nothrow_constructible_from = std::is_nothrow_copy_constructible<TokenizerResult<Haystack, Needle>>; 0449 } 0450 } 0451 0452 #ifdef __cpp_deduction_guides 0453 // these tell the compiler how to determine the QStringTokenizer 0454 // template arguments based on the constructor arguments (CTAD): 0455 template<typename Haystack, typename Needle> 0456 QStringTokenizer(Haystack &&, Needle &&)->Q_TOK_RESULT; 0457 template<typename Haystack, typename Needle> 0458 QStringTokenizer(Haystack &&, Needle &&, Qt::SplitBehavior)->Q_TOK_RESULT; 0459 template<typename Haystack, typename Needle> 0460 QStringTokenizer(Haystack &&, Needle &&, Qt::SplitBehavior, Qt::CaseSensitivity)->Q_TOK_RESULT; 0461 template<typename Haystack, typename Needle> 0462 QStringTokenizer(Haystack &&, Needle &&, Qt::CaseSensitivity)->Q_TOK_RESULT; 0463 template<typename Haystack, typename Needle> 0464 QStringTokenizer(Haystack &&, Needle &&, Qt::CaseSensitivity, Qt::SplitBehavior)->Q_TOK_RESULT; 0465 #endif 0466 0467 #undef Q_TOK_RESULT 0468 0469 template<typename Haystack, typename Needle, typename... Flags> 0470 Q_REQUIRED_RESULT constexpr auto qTokenize(Haystack &&h, Needle &&n, Flags... flags) 0471 -> decltype(QtPrivate::Tok::TokenizerResult<Haystack, Needle>{std::forward<Haystack>(h), std::forward<Needle>(n), flags...}) 0472 { 0473 return QtPrivate::Tok::TokenizerResult<Haystack, Needle>{std::forward<Haystack>(h), std::forward<Needle>(n), flags...}; 0474 } 0475 0476 template<typename Haystack, typename Needle> 0477 auto QStringTokenizerBase<Haystack, Needle>::next(tokenizer_state state) const noexcept -> next_result 0478 { 0479 while (true) { 0480 if (state.end < 0) { 0481 // already at end: 0482 return {{}, false, state}; 0483 } 0484 state.end = m_haystack.indexOf(m_needle, state.start + state.extra, m_cs); 0485 Haystack result; 0486 if (state.end >= 0) { 0487 // token separator found => return intermediate element: 0488 result = m_haystack.mid(state.start, state.end - state.start); 0489 const auto ns = QtPrivate::Tok::size(m_needle); 0490 state.start = state.end + ns; 0491 state.extra = (ns == 0 ? 1 : 0); 0492 } else { 0493 // token separator not found => return final element: 0494 result = m_haystack.mid(state.start); 0495 } 0496 if ((m_sb & Qt::SkipEmptyParts) && result.isEmpty()) 0497 continue; 0498 return {result, true, state}; 0499 } 0500 } 0501 0502 #endif /* QSTRINGTOKENIZER_H */