File indexing completed on 2024-05-05 05:50:42

0001 /*
0002     SPDX-FileCopyrightText: 2020-2021 Klarälvdalens Datakonsult AB a KDAB Group company, info@kdab.com, author Marc Mutz <marc.mutz@kdab.com>
0003 
0004     This file is part of KDToolBox (https://github.com/KDAB/KDToolBox).
0005 
0006     SPDX-License-Identifier: MIT
0007 */
0008 
0009 #ifndef QSTRINGTOKENIZER_H
0010 #define QSTRINGTOKENIZER_H
0011 
0012 #include <qnamespace.h>
0013 
0014 QT_BEGIN_NAMESPACE
0015 
0016 template<typename, typename>
0017 class QStringBuilder;
0018 template<typename>
0019 class QList;
0020 
0021 QT_END_NAMESPACE
0022 
0023 #if defined(Q_QDOC) || (defined(__cpp_range_based_for) && __cpp_range_based_for >= 201603)
0024 #define Q_STRINGTOKENIZER_USE_SENTINEL
0025 #endif
0026 
0027 class QStringTokenizerBaseBase
0028 {
0029 protected:
0030     ~QStringTokenizerBaseBase() = default;
0031     Q_DECL_CONSTEXPR QStringTokenizerBaseBase(Qt::SplitBehavior sb, Qt::CaseSensitivity cs) noexcept
0032         : m_sb{sb}
0033         , m_cs{cs}
0034     {
0035     }
0036 
0037     struct tokenizer_state {
0038         qsizetype start, end, extra;
0039         friend constexpr bool operator==(tokenizer_state lhs, tokenizer_state rhs) noexcept
0040         {
0041             return lhs.start == rhs.start && lhs.end == rhs.end && lhs.extra == rhs.extra;
0042         }
0043         friend constexpr bool operator!=(tokenizer_state lhs, tokenizer_state rhs) noexcept
0044         {
0045             return !operator==(lhs, rhs);
0046         }
0047     };
0048 
0049     Qt::SplitBehavior m_sb;
0050     Qt::CaseSensitivity m_cs;
0051 };
0052 
0053 template<typename Haystack, typename Needle>
0054 class QStringTokenizerBase : protected QStringTokenizerBaseBase
0055 {
0056     struct next_result {
0057         Haystack value;
0058         bool ok;
0059         tokenizer_state state;
0060     };
0061     inline next_result next(tokenizer_state state) const noexcept;
0062     inline next_result toFront() const noexcept
0063     {
0064         return next({});
0065     }
0066 
0067 public:
0068     constexpr explicit QStringTokenizerBase(Haystack haystack, Needle needle, Qt::SplitBehavior sb, Qt::CaseSensitivity cs) noexcept
0069         : QStringTokenizerBaseBase{sb, cs}
0070         , m_haystack{haystack}
0071         , m_needle{needle}
0072     {
0073     }
0074 
0075     class iterator;
0076     friend class iterator;
0077 #ifdef Q_STRINGTOKENIZER_USE_SENTINEL
0078     class sentinel
0079     {
0080         friend constexpr bool operator==(sentinel, sentinel) noexcept
0081         {
0082             return true;
0083         }
0084         friend constexpr bool operator!=(sentinel, sentinel) noexcept
0085         {
0086             return false;
0087         }
0088     };
0089 #else
0090     using sentinel = iterator;
0091 #endif
0092     class iterator
0093     {
0094         const QStringTokenizerBase *tokenizer;
0095         next_result current;
0096         friend class QStringTokenizerBase;
0097         explicit iterator(const QStringTokenizerBase &t) noexcept
0098             : tokenizer{&t}
0099             , current{t.toFront()}
0100         {
0101         }
0102 
0103     public:
0104         using difference_type = qsizetype;
0105         using value_type = Haystack;
0106         using pointer = const value_type *;
0107         using reference = const value_type &;
0108         using iterator_category = std::forward_iterator_tag;
0109 
0110         iterator() noexcept = default;
0111 
0112         // violates std::forward_iterator (returns a reference into the iterator)
0113         Q_REQUIRED_RESULT constexpr const Haystack *operator->() const
0114         {
0115             return Q_ASSERT(current.ok), &current.value;
0116         }
0117         Q_REQUIRED_RESULT constexpr const Haystack &operator*() const
0118         {
0119             return *operator->();
0120         }
0121 
0122         iterator &operator++()
0123         {
0124             advance();
0125             return *this;
0126         }
0127         iterator operator++(int)
0128         {
0129             auto tmp = *this;
0130             advance();
0131             return tmp;
0132         }
0133 
0134         friend constexpr bool operator==(const iterator &lhs, const iterator &rhs) noexcept
0135         {
0136             return lhs.current.ok == rhs.current.ok && (!lhs.current.ok || (Q_ASSERT(lhs.tokenizer == rhs.tokenizer), lhs.current.state == rhs.current.state));
0137         }
0138         friend constexpr bool operator!=(const iterator &lhs, const iterator &rhs) noexcept
0139         {
0140             return !operator==(lhs, rhs);
0141         }
0142 #ifdef Q_STRINGTOKENIZER_USE_SENTINEL
0143         friend constexpr bool operator==(const iterator &lhs, sentinel) noexcept
0144         {
0145             return !lhs.current.ok;
0146         }
0147         friend constexpr bool operator!=(const iterator &lhs, sentinel) noexcept
0148         {
0149             return !operator==(lhs, sentinel{});
0150         }
0151         friend constexpr bool operator==(sentinel, const iterator &rhs) noexcept
0152         {
0153             return !rhs.current.ok;
0154         }
0155         friend constexpr bool operator!=(sentinel, const iterator &rhs) noexcept
0156         {
0157             return !operator==(sentinel{}, rhs);
0158         }
0159 #endif
0160     private:
0161         void advance()
0162         {
0163             Q_ASSERT(current.ok);
0164             current = tokenizer->next(current.state);
0165         }
0166     };
0167     using const_iterator = iterator;
0168 
0169     using size_type = std::size_t;
0170     using difference_type = typename iterator::difference_type;
0171     using value_type = typename iterator::value_type;
0172     using pointer = typename iterator::pointer;
0173     using const_pointer = pointer;
0174     using reference = typename iterator::reference;
0175     using const_reference = reference;
0176 
0177     Q_REQUIRED_RESULT iterator begin() const noexcept
0178     {
0179         return iterator{*this};
0180     }
0181     Q_REQUIRED_RESULT iterator cbegin() const noexcept
0182     {
0183         return begin();
0184     }
0185     template<bool = std::is_same<iterator, sentinel>::value> // ODR protection
0186     Q_REQUIRED_RESULT constexpr sentinel end() const noexcept
0187     {
0188         return {};
0189     }
0190     template<bool = std::is_same<iterator, sentinel>::value> // ODR protection
0191     Q_REQUIRED_RESULT constexpr sentinel cend() const noexcept
0192     {
0193         return {};
0194     }
0195 
0196 private:
0197     Haystack m_haystack;
0198     Needle m_needle;
0199 };
0200 
0201 #include <qstringview.h>
0202 
0203 namespace QtPrivate
0204 {
0205 namespace Tok
0206 {
0207 Q_DECL_CONSTEXPR qsizetype size(QChar) noexcept
0208 {
0209     return 1;
0210 }
0211 template<typename String>
0212 constexpr qsizetype size(const String &s) noexcept
0213 {
0214     return static_cast<qsizetype>(s.size());
0215 }
0216 
0217 template<typename String>
0218 struct ViewForImpl {
0219 };
0220 template<>
0221 struct ViewForImpl<QStringView> {
0222     using type = QStringView;
0223 };
0224 template<>
0225 struct ViewForImpl<QLatin1String> {
0226     using type = QLatin1String;
0227 };
0228 template<>
0229 struct ViewForImpl<QChar> {
0230     using type = QChar;
0231 };
0232 template<>
0233 struct ViewForImpl<QString> : ViewForImpl<QStringView> {
0234 };
0235 template<>
0236 struct ViewForImpl<QLatin1Char> : ViewForImpl<QChar> {
0237 };
0238 template<>
0239 struct ViewForImpl<char16_t> : ViewForImpl<QChar> {
0240 };
0241 template<>
0242 struct ViewForImpl<char16_t *> : ViewForImpl<QStringView> {
0243 };
0244 template<>
0245 struct ViewForImpl<const char16_t *> : ViewForImpl<QStringView> {
0246 };
0247 template<typename LHS, typename RHS>
0248 struct ViewForImpl<QStringBuilder<LHS, RHS>> : ViewForImpl<typename QStringBuilder<LHS, RHS>::ConvertTo> {
0249 };
0250 template<typename Char, typename... Args>
0251 struct ViewForImpl<std::basic_string<Char, Args...>> : ViewForImpl<Char *> {
0252 };
0253 #ifdef __cpp_lib_string_view
0254 template<typename Char, typename... Args>
0255 struct ViewForImpl<std::basic_string_view<Char, Args...>> : ViewForImpl<Char *> {
0256 };
0257 #endif
0258 
0259 // This metafunction maps a StringLike to a View (currently, QChar,
0260 // QStringView, QLatin1String). This is what QStringTokenizerBase
0261 // operates on. QStringTokenizer adds pinning to keep rvalues alive
0262 // for the duration of the algorithm.
0263 template<typename String>
0264 using ViewFor = typename ViewForImpl<typename std::decay<String>::type>::type;
0265 
0266 // Pinning:
0267 // rvalues of owning string types need to be moved into QStringTokenizer
0268 // to keep them alive for the lifetime of the tokenizer. For lvalues, we
0269 // assume the user takes care of that.
0270 
0271 // default: don't pin anything (characters are pinned implicitly)
0272 template<typename String>
0273 struct PinForImpl {
0274     using type = ViewFor<String>;
0275 };
0276 
0277 // rvalue QString -> QString
0278 template<>
0279 struct PinForImpl<QString> {
0280     using type = QString;
0281 };
0282 
0283 // rvalue std::basic_string -> basic_string
0284 template<typename Char, typename... Args>
0285 struct PinForImpl<std::basic_string<Char, Args...>> {
0286     using type = std::basic_string<Char, Args...>;
0287 };
0288 
0289 // rvalue QStringBuilder -> pin as the nested ConvertTo type
0290 template<typename LHS, typename RHS>
0291 struct PinForImpl<QStringBuilder<LHS, RHS>> : PinForImpl<typename QStringBuilder<LHS, RHS>::ConvertTo> {
0292 };
0293 
0294 template<typename StringLike>
0295 using PinFor = typename PinForImpl<typename std::remove_cv<StringLike>::type>::type;
0296 
0297 template<typename T>
0298 struct is_owning_string_type : std::false_type {
0299 };
0300 template<>
0301 struct is_owning_string_type<QString> : std::true_type {
0302 };
0303 template<typename... Args>
0304 struct is_owning_string_type<std::basic_string<Args...>> : std::true_type {
0305 };
0306 
0307 // unpinned
0308 template<typename T, bool pinned = is_owning_string_type<T>::value>
0309 struct Pinning {
0310     // this is the storage for non-pinned types - no storage
0311     constexpr Pinning(const T &) noexcept
0312     {
0313     }
0314     // Since we don't store something, the view() method needs to be
0315     // given something it can return.
0316     constexpr T view(T t) const noexcept
0317     {
0318         return t;
0319     }
0320 };
0321 
0322 // pinned
0323 template<typename T>
0324 struct Pinning<T, true> {
0325     T m_string;
0326     // specialisation for owning string types (QString, std::u16string):
0327     // stores the string:
0328     constexpr Pinning(T &&s) noexcept
0329         : m_string{std::move(s)}
0330     {
0331     }
0332     // ... and thus view() uses that instead of the argument passed in:
0333     constexpr QStringView view(const T &) const noexcept
0334     {
0335         return m_string;
0336     }
0337 };
0338 
0339 // NeedlePinning and HaystackPinning are there to distinguish them as
0340 // base classes of QStringTokenizer. We use inheritance to reap the
0341 // empty base class optimization.
0342 template<typename T>
0343 struct NeedlePinning : Pinning<T> {
0344     using Pinning<T>::Pinning;
0345     template<typename Arg>
0346     constexpr auto needleView(Arg &&a) const noexcept -> decltype(this->view(std::forward<Arg>(a)))
0347     {
0348         return this->view(std::forward<Arg>(a));
0349     }
0350 };
0351 
0352 template<typename T>
0353 struct HaystackPinning : Pinning<T> {
0354     using Pinning<T>::Pinning;
0355     template<typename Arg>
0356     constexpr auto haystackView(Arg &&a) const noexcept -> decltype(this->view(std::forward<Arg>(a)))
0357     {
0358         return this->view(std::forward<Arg>(a));
0359     }
0360 };
0361 
0362 // The Base of a QStringTokenizer is QStringTokenizerBase for the views
0363 // corresponding to the Haystack and Needle template arguments
0364 //
0365 // ie. QStringTokenizer<QString, QString>
0366 //       : QStringTokenizerBase<QStringView, QStringView> (+ pinning)
0367 template<typename Haystack, typename Needle>
0368 using TokenizerBase = QStringTokenizerBase<ViewFor<Haystack>, ViewFor<Needle>>;
0369 } // namespace Tok
0370 } // namespace QtPrivate
0371 
0372 template<typename Haystack, typename Needle>
0373 class QStringTokenizer : private QtPrivate::Tok::HaystackPinning<Haystack>,
0374                          private QtPrivate::Tok::NeedlePinning<Needle>,
0375                          public QtPrivate::Tok::TokenizerBase<Haystack, Needle>
0376 {
0377     using HPin = QtPrivate::Tok::HaystackPinning<Haystack>;
0378     using NPin = QtPrivate::Tok::NeedlePinning<Needle>;
0379     using Base = QtPrivate::Tok::TokenizerBase<Haystack, Needle>;
0380     template<typename Container, typename HPin>
0381     struct if_haystack_not_pinned_impl : std::enable_if<std::is_empty<HPin>::value, bool> {
0382     };
0383     template<typename Container>
0384     using if_haystack_not_pinned = typename if_haystack_not_pinned_impl<Container, HPin>::type;
0385     template<typename Container, typename Iterator = decltype(std::begin(std::declval<Container>()))>
0386     using if_compatible_container =
0387         typename std::enable_if<std::is_same<typename Base::value_type, typename std::iterator_traits<Iterator>::value_type>::value, bool>::type;
0388 
0389 public:
0390     using value_type = typename Base::value_type;
0391 
0392     constexpr explicit QStringTokenizer(Haystack haystack, Needle needle, Qt::CaseSensitivity cs, Qt::SplitBehavior sb = Qt::KeepEmptyParts)
0393         // here, we present the haystack to Pinning<>, for optional storing.
0394         // If it did store, haystack is moved-from and mustn't be touched
0395         // any longer, which is why view() for these Pinning<>s ignores the
0396         // argument.
0397         : HPin{std::forward<Haystack>(haystack)}
0398         , NPin{std::forward<Needle>(needle)}
0399         ,
0400         // If Pinning<> didn't store, we pass the haystack (ditto needle)
0401         // to view() again, so it can be copied from there.
0402         Base{this->haystackView(haystack), this->needleView(needle), sb, cs}
0403     {
0404     }
0405     constexpr explicit QStringTokenizer(Haystack haystack, Needle needle, Qt::SplitBehavior sb = Qt::KeepEmptyParts, Qt::CaseSensitivity cs = Qt::CaseSensitive)
0406         : HPin{std::forward<Haystack>(haystack)}
0407         , NPin{std::forward<Needle>(needle)}
0408         , Base{this->haystackView(haystack), this->needleView(needle), sb, cs}
0409     {
0410     }
0411 
0412     template<typename Container = QVector<value_type>, if_compatible_container<Container> = true>
0413     Container toContainer(Container &&c = {}) const &
0414     {
0415         for (auto e : *this)
0416             c.push_back(e);
0417         return std::forward<Container>(c);
0418     }
0419 
0420     template<typename Container = QVector<value_type>, if_compatible_container<Container> = true, if_haystack_not_pinned<Container> = true>
0421     Container toContainer(Container &&c = {}) const &&
0422     {
0423         for (auto e : *this)
0424             c.push_back(e);
0425         return std::forward<Container>(c);
0426     }
0427 };
0428 
0429 namespace QtPrivate
0430 {
0431 namespace Tok
0432 {
0433 // This meta function just calculated the template arguments for the
0434 // QStringTokenizer (not -Base), based on the actual arguments passed
0435 // to qTokenize() (or the ctor, with CTAD). It basically detects rvalue
0436 // QString and std::basic_string and otherwise decays the arguments to
0437 // the respective view type.
0438 //
0439 // #define works around a C++ restriction: [temp.deduct.guide]/3 seems
0440 // to ask for the simple-template-id following the `->` of a deduction
0441 // guide to be identical to the class name for which we guide deduction.
0442 // In particular, Clang rejects a template alias there, while GCC accepts
0443 // it.
0444 #define Q_TOK_RESULT QStringTokenizer<QtPrivate::Tok::PinFor<Haystack>, QtPrivate::Tok::PinFor<Needle>> /*end*/
0445 template<typename Haystack, typename Needle>
0446 using TokenizerResult = Q_TOK_RESULT;
0447 template<typename Haystack, typename Needle>
0448 using is_nothrow_constructible_from = std::is_nothrow_copy_constructible<TokenizerResult<Haystack, Needle>>;
0449 }
0450 }
0451 
0452 #ifdef __cpp_deduction_guides
0453 // these tell the compiler how to determine the QStringTokenizer
0454 // template arguments based on the constructor arguments (CTAD):
0455 template<typename Haystack, typename Needle>
0456 QStringTokenizer(Haystack &&, Needle &&)->Q_TOK_RESULT;
0457 template<typename Haystack, typename Needle>
0458 QStringTokenizer(Haystack &&, Needle &&, Qt::SplitBehavior)->Q_TOK_RESULT;
0459 template<typename Haystack, typename Needle>
0460 QStringTokenizer(Haystack &&, Needle &&, Qt::SplitBehavior, Qt::CaseSensitivity)->Q_TOK_RESULT;
0461 template<typename Haystack, typename Needle>
0462 QStringTokenizer(Haystack &&, Needle &&, Qt::CaseSensitivity)->Q_TOK_RESULT;
0463 template<typename Haystack, typename Needle>
0464 QStringTokenizer(Haystack &&, Needle &&, Qt::CaseSensitivity, Qt::SplitBehavior)->Q_TOK_RESULT;
0465 #endif
0466 
0467 #undef Q_TOK_RESULT
0468 
0469 template<typename Haystack, typename Needle, typename... Flags>
0470 Q_REQUIRED_RESULT constexpr auto qTokenize(Haystack &&h, Needle &&n, Flags... flags)
0471     -> decltype(QtPrivate::Tok::TokenizerResult<Haystack, Needle>{std::forward<Haystack>(h), std::forward<Needle>(n), flags...})
0472 {
0473     return QtPrivate::Tok::TokenizerResult<Haystack, Needle>{std::forward<Haystack>(h), std::forward<Needle>(n), flags...};
0474 }
0475 
0476 template<typename Haystack, typename Needle>
0477 auto QStringTokenizerBase<Haystack, Needle>::next(tokenizer_state state) const noexcept -> next_result
0478 {
0479     while (true) {
0480         if (state.end < 0) {
0481             // already at end:
0482             return {{}, false, state};
0483         }
0484         state.end = m_haystack.indexOf(m_needle, state.start + state.extra, m_cs);
0485         Haystack result;
0486         if (state.end >= 0) {
0487             // token separator found => return intermediate element:
0488             result = m_haystack.mid(state.start, state.end - state.start);
0489             const auto ns = QtPrivate::Tok::size(m_needle);
0490             state.start = state.end + ns;
0491             state.extra = (ns == 0 ? 1 : 0);
0492         } else {
0493             // token separator not found => return final element:
0494             result = m_haystack.mid(state.start);
0495         }
0496         if ((m_sb & Qt::SkipEmptyParts) && result.isEmpty())
0497             continue;
0498         return {result, true, state};
0499     }
0500 }
0501 
0502 #endif /* QSTRINGTOKENIZER_H */