File indexing completed on 2024-04-14 03:55:23

0001 /*
0002     SPDX-FileCopyrightText: 2008-2009 Michel Ludwig <michel.ludwig@kdemail.net>
0003 
0004     SPDX-License-Identifier: LGPL-2.0-or-later
0005 */
0006 
0007 #ifndef PREFIXSTORE_H
0008 #define PREFIXSTORE_H
0009 
0010 #include <QHash>
0011 #include <QList>
0012 #include <QPair>
0013 #include <QSet>
0014 #include <QString>
0015 
0016 #include "katetextline.h"
0017 
0018 /**
0019  * This class can be used to efficiently search for occurrences of strings in
0020  * a given string. Theoretically speaking, a finite deterministic automaton is
0021  * constructed which exactly accepts the strings that are to be recognized. In
0022  * order to check whether a given string contains one of the strings that are being
0023  * searched for the constructed automaton has to applied on each position in the
0024  * given string.
0025  **/
0026 class KatePrefixStore
0027 {
0028 public:
0029     typedef QPair<bool, bool> BooleanPair;
0030 
0031     virtual ~KatePrefixStore() = default;
0032 
0033     void addPrefix(const QString &prefix);
0034     void removePrefix(const QString &prefix);
0035 
0036     /**
0037      * Returns the shortest prefix of the given string that is contained in
0038      * this prefix store starting at position 'start'.
0039      **/
0040     QString findPrefix(const QString &s, int start = 0) const;
0041 
0042     /**
0043      * Returns the shortest prefix of the given string that is contained in
0044      * this prefix store starting at position 'start'.
0045      **/
0046     QString findPrefix(const Kate::TextLine &line, int start = 0) const;
0047 
0048     int longestPrefixLength() const;
0049 
0050     void clear();
0051 
0052     void dump();
0053 
0054 protected:
0055     int m_longestPrefixLength = 0;
0056     QSet<QString> m_prefixSet;
0057 
0058     // State x Char -> Nr. of char occurrences in prefixes x State
0059     typedef QHash<unsigned short, QPair<unsigned int, unsigned long long>> CharToOccurrenceStateHash;
0060     typedef QHash<unsigned long long, CharToOccurrenceStateHash> TransitionFunction;
0061     TransitionFunction m_transitionFunction;
0062     QSet<unsigned long long> m_acceptingStates;
0063     QList<unsigned long long> m_stateFreeList;
0064     unsigned long long m_lastAssignedState = 0;
0065 
0066     int computeLongestPrefixLength();
0067     unsigned long long nextFreeState();
0068     //     bool containsPrefixOfLengthEndingWith(int length, const QChar& c);
0069 };
0070 
0071 #endif