File indexing completed on 2024-05-05 04:38:09

0001 /*
0002     SPDX-FileCopyrightText: 2008 David Nolden <david.nolden.kdevelop@art-master.de>
0003 
0004     SPDX-License-Identifier: GPL-2.0-or-later
0005 */
0006 
0007 #ifndef KDEVPLATFORM_INDEXED_STRING_H
0008 #define KDEVPLATFORM_INDEXED_STRING_H
0009 
0010 //krazy:excludeall=dpointer,inline
0011 
0012 #include <QMetaType>
0013 #include <QStringView>
0014 #include <QUrl>
0015 
0016 #include "referencecounting.h"
0017 
0018 #include "serializationexport.h"
0019 
0020 namespace KDevelop {
0021 /**
0022  * This string does "disk reference-counting", which means that reference-counts are maintainted,
0023  * but only when the string is in a disk-stored location. The file referencecounting.h is used
0024  * to manage this condition.
0025  *
0026  * Whenever reference-counting is enabled for a range that contains the IndexedString, it will
0027  * manipulate the reference-counts.
0028  *
0029  * The duchain storage mechanisms automatically are about correctly managing that condition,
0030  * so you don't need to care, and can just use this class in every duchain data type without
0031  * restrictions.
0032  *
0033  * @warning Do not use IndexedString after QCoreApplication::aboutToQuit() has been emitted,
0034  * items that are not disk-referenced will be invalid at that point.
0035  *
0036  * @note Empty strings have an index of zero.
0037  *
0038  * @note Strings of length one are not put into the repository, but are encoded directly within
0039  * the index: They are encoded like @c 0xffff00bb where @c bb is the byte of the character.
0040  *
0041  * @note Move constructor and move assignment operator are deliberately not implemented for
0042  * IndexedString. The move operations are tricky to implement correctly and more efficiently
0043  * in practice than the copy operations, seeing that more than 99% of arguments of the copy/move
0044  * operations are not disk-reference-counted. Moreover, according to test runs at the time of
0045  * this writing, the copied- or moved-from IndexedString is never disk-reference-counted in
0046  * practice, so the moved-from string's reference count cannot be stolen. IndexedString's copy
0047  * constructor and copy assignment operator are noexcept to allow noexcept move operations in
0048  * classes that contain IndexedString as a data member.
0049  */
0050 class KDEVPLATFORMSERIALIZATION_EXPORT IndexedString
0051 {
0052 public:
0053     IndexedString() = default;
0054     /**
0055      * @param str must be a utf8 encoded string, does not need to be 0-terminated.
0056      * @param length must be its length in bytes.
0057      * @param hash must be a hash as constructed with the here defined hash functions.
0058      *             If it is zero, it will be computed.
0059      */
0060     explicit IndexedString(const char* str, unsigned short length, unsigned int hash = 0);
0061 
0062     /**
0063      * Needs a zero terminated string. When the information is already available,
0064      * try using the other constructor.
0065      *
0066      * WARNING There is a UTF8-related issue when attempting to retrieve the string
0067      * using str from an IndexedString built from this constructor
0068      */
0069     explicit IndexedString(const char* str);
0070 
0071     explicit IndexedString(char c);
0072 
0073     explicit IndexedString(bool) = delete;
0074 
0075     /**
0076      * When the information is already available, try using the other constructor.
0077      *
0078      * @note This is expensive.
0079      */
0080     explicit IndexedString(QStringView str);
0081     explicit IndexedString(const QString& str)
0082         : IndexedString(QStringView{str})
0083     {
0084     }
0085 
0086     /**
0087      * When the information is already available, try using the other constructor.
0088      *
0089      * @note This is expensive.
0090      */
0091     explicit IndexedString(const QByteArray& str);
0092 
0093     /**
0094      * Returns a not reference-counted IndexedString that represents the given index.
0095      *
0096      * @warning It is dangerous dealing with indices directly, because it may break
0097      *          the reference counting logic. Never store pure indices to disk.
0098      */
0099     static IndexedString fromIndex(unsigned int index)
0100     {
0101         IndexedString ret;
0102         ret.m_index = index;
0103         return ret;
0104     }
0105 
0106     /**
0107      * @warning This is relatively expensive: needs a mutex lock, hash lookups, and eventual loading,
0108      *       so avoid it when possible.
0109      */
0110     static int lengthFromIndex(unsigned int index);
0111 
0112     IndexedString(const IndexedString&) noexcept;
0113 
0114     ~IndexedString();
0115 
0116     /**
0117      * Creates an indexed string from a QUrl, this is expensive.
0118      */
0119     explicit IndexedString(const QUrl& url);
0120 
0121     /**
0122      * Re-construct a QUrl from this indexed string, the result can be used with the
0123      * QUrl-using constructor.
0124      *
0125      * @note This is expensive.
0126      */
0127     QUrl toUrl() const;
0128 
0129     inline unsigned int hash() const
0130     {
0131         return m_index;
0132     }
0133 
0134     /**
0135      * The string is uniquely identified by this index. You can use it for comparison.
0136      *
0137      * @warning It is dangerous dealing with indices directly, because it may break the
0138      *          reference counting logic. never store pure indices to disk
0139      */
0140     inline unsigned int index() const
0141     {
0142         return m_index;
0143     }
0144 
0145     bool isEmpty() const
0146     {
0147         return m_index == 0;
0148     }
0149 
0150     /**
0151      * @note This is relatively expensive: needs a mutex lock, hash lookups, and eventual loading,
0152      * so avoid it when possible.
0153      */
0154     int length() const;
0155 
0156     /**
0157      * Returns the underlying c string, in utf-8 encoding.
0158      *
0159      * @warning The string is not 0-terminated, consider length()!
0160      */
0161     const char* c_str() const;
0162 
0163     /**
0164      * Convenience function, avoid using it, it's relatively expensive
0165      */
0166     QString str() const;
0167 
0168     /**
0169      * Convenience function, avoid using it, it's relatively expensive (less expensive then str() though)
0170      */
0171     QByteArray byteArray() const;
0172 
0173     IndexedString& operator=(const IndexedString&) noexcept;
0174 
0175     friend KDEVPLATFORMSERIALIZATION_EXPORT void swap(IndexedString&, IndexedString&) noexcept;
0176 
0177     /**
0178      * Fast index-based comparison
0179      */
0180     bool operator ==(const IndexedString& rhs) const
0181     {
0182         return m_index == rhs.m_index;
0183     }
0184 
0185     /**
0186      * Fast index-based comparison
0187      */
0188     bool operator !=(const IndexedString& rhs) const
0189     {
0190         return m_index != rhs.m_index;
0191     }
0192 
0193     /**
0194      * Does not compare alphabetically, uses the index for ordering.
0195      */
0196     bool operator <(const IndexedString& rhs) const
0197     {
0198         return m_index < rhs.m_index;
0199     }
0200 
0201     /**
0202      * Use this to construct a hash-value on-the-fly
0203      *
0204      * To read it, just use the hash member, and when a new string is started, call @c clear().
0205      *
0206      * This needs very fast performance(per character operation), so it must stay inlined.
0207      */
0208     struct RunningHash
0209     {
0210         enum {
0211             HashInitialValue = 5381
0212         };
0213 
0214         RunningHash()
0215         {
0216         }
0217         inline void append(const char c)
0218         {
0219             hash = ((hash << 5) + hash) + c; /* hash * 33 + c */
0220         }
0221         inline void clear()
0222         {
0223             hash = HashInitialValue;
0224         }
0225 
0226         /// We initialize the hash with zero, because we want empty strings to create a zero hash(invalid)
0227         unsigned int hash = HashInitialValue;
0228     };
0229 
0230     static unsigned int hashString(const char* str, unsigned short length);
0231 
0232     /**
0233      * Optimized function that only computes the index of a string
0234      * removes the overhead of the IndexedString ref counting
0235      */
0236     static uint indexForString(const char* str, unsigned short length, uint hash = 0);
0237     static uint indexForString(const QString& str, uint hash = 0);
0238 
0239 private:
0240     uint m_index = 0;
0241 };
0242 
0243 // the following function would need to be exported in case you'd remove the inline keyword.
0244 inline uint qHash(const KDevelop::IndexedString& str)
0245 {
0246     return str.index();
0247 }
0248 }
0249 
0250 /**
0251  * qDebug() stream operator.  Writes the string to the debug output.
0252  */
0253 KDEVPLATFORMSERIALIZATION_EXPORT QDebug operator<<(QDebug s, const KDevelop::IndexedString& string);
0254 
0255 Q_DECLARE_METATYPE(KDevelop::IndexedString)
0256 Q_DECLARE_TYPEINFO(KDevelop::IndexedString, Q_MOVABLE_TYPE);
0257 
0258 #endif