File indexing completed on 2024-05-05 04:38:09
0001 /* 0002 SPDX-FileCopyrightText: 2008 David Nolden <david.nolden.kdevelop@art-master.de> 0003 0004 SPDX-License-Identifier: GPL-2.0-or-later 0005 */ 0006 0007 #ifndef KDEVPLATFORM_INDEXED_STRING_H 0008 #define KDEVPLATFORM_INDEXED_STRING_H 0009 0010 //krazy:excludeall=dpointer,inline 0011 0012 #include <QMetaType> 0013 #include <QStringView> 0014 #include <QUrl> 0015 0016 #include "referencecounting.h" 0017 0018 #include "serializationexport.h" 0019 0020 namespace KDevelop { 0021 /** 0022 * This string does "disk reference-counting", which means that reference-counts are maintainted, 0023 * but only when the string is in a disk-stored location. The file referencecounting.h is used 0024 * to manage this condition. 0025 * 0026 * Whenever reference-counting is enabled for a range that contains the IndexedString, it will 0027 * manipulate the reference-counts. 0028 * 0029 * The duchain storage mechanisms automatically are about correctly managing that condition, 0030 * so you don't need to care, and can just use this class in every duchain data type without 0031 * restrictions. 0032 * 0033 * @warning Do not use IndexedString after QCoreApplication::aboutToQuit() has been emitted, 0034 * items that are not disk-referenced will be invalid at that point. 0035 * 0036 * @note Empty strings have an index of zero. 0037 * 0038 * @note Strings of length one are not put into the repository, but are encoded directly within 0039 * the index: They are encoded like @c 0xffff00bb where @c bb is the byte of the character. 0040 * 0041 * @note Move constructor and move assignment operator are deliberately not implemented for 0042 * IndexedString. The move operations are tricky to implement correctly and more efficiently 0043 * in practice than the copy operations, seeing that more than 99% of arguments of the copy/move 0044 * operations are not disk-reference-counted. Moreover, according to test runs at the time of 0045 * this writing, the copied- or moved-from IndexedString is never disk-reference-counted in 0046 * practice, so the moved-from string's reference count cannot be stolen. IndexedString's copy 0047 * constructor and copy assignment operator are noexcept to allow noexcept move operations in 0048 * classes that contain IndexedString as a data member. 0049 */ 0050 class KDEVPLATFORMSERIALIZATION_EXPORT IndexedString 0051 { 0052 public: 0053 IndexedString() = default; 0054 /** 0055 * @param str must be a utf8 encoded string, does not need to be 0-terminated. 0056 * @param length must be its length in bytes. 0057 * @param hash must be a hash as constructed with the here defined hash functions. 0058 * If it is zero, it will be computed. 0059 */ 0060 explicit IndexedString(const char* str, unsigned short length, unsigned int hash = 0); 0061 0062 /** 0063 * Needs a zero terminated string. When the information is already available, 0064 * try using the other constructor. 0065 * 0066 * WARNING There is a UTF8-related issue when attempting to retrieve the string 0067 * using str from an IndexedString built from this constructor 0068 */ 0069 explicit IndexedString(const char* str); 0070 0071 explicit IndexedString(char c); 0072 0073 explicit IndexedString(bool) = delete; 0074 0075 /** 0076 * When the information is already available, try using the other constructor. 0077 * 0078 * @note This is expensive. 0079 */ 0080 explicit IndexedString(QStringView str); 0081 explicit IndexedString(const QString& str) 0082 : IndexedString(QStringView{str}) 0083 { 0084 } 0085 0086 /** 0087 * When the information is already available, try using the other constructor. 0088 * 0089 * @note This is expensive. 0090 */ 0091 explicit IndexedString(const QByteArray& str); 0092 0093 /** 0094 * Returns a not reference-counted IndexedString that represents the given index. 0095 * 0096 * @warning It is dangerous dealing with indices directly, because it may break 0097 * the reference counting logic. Never store pure indices to disk. 0098 */ 0099 static IndexedString fromIndex(unsigned int index) 0100 { 0101 IndexedString ret; 0102 ret.m_index = index; 0103 return ret; 0104 } 0105 0106 /** 0107 * @warning This is relatively expensive: needs a mutex lock, hash lookups, and eventual loading, 0108 * so avoid it when possible. 0109 */ 0110 static int lengthFromIndex(unsigned int index); 0111 0112 IndexedString(const IndexedString&) noexcept; 0113 0114 ~IndexedString(); 0115 0116 /** 0117 * Creates an indexed string from a QUrl, this is expensive. 0118 */ 0119 explicit IndexedString(const QUrl& url); 0120 0121 /** 0122 * Re-construct a QUrl from this indexed string, the result can be used with the 0123 * QUrl-using constructor. 0124 * 0125 * @note This is expensive. 0126 */ 0127 QUrl toUrl() const; 0128 0129 inline unsigned int hash() const 0130 { 0131 return m_index; 0132 } 0133 0134 /** 0135 * The string is uniquely identified by this index. You can use it for comparison. 0136 * 0137 * @warning It is dangerous dealing with indices directly, because it may break the 0138 * reference counting logic. never store pure indices to disk 0139 */ 0140 inline unsigned int index() const 0141 { 0142 return m_index; 0143 } 0144 0145 bool isEmpty() const 0146 { 0147 return m_index == 0; 0148 } 0149 0150 /** 0151 * @note This is relatively expensive: needs a mutex lock, hash lookups, and eventual loading, 0152 * so avoid it when possible. 0153 */ 0154 int length() const; 0155 0156 /** 0157 * Returns the underlying c string, in utf-8 encoding. 0158 * 0159 * @warning The string is not 0-terminated, consider length()! 0160 */ 0161 const char* c_str() const; 0162 0163 /** 0164 * Convenience function, avoid using it, it's relatively expensive 0165 */ 0166 QString str() const; 0167 0168 /** 0169 * Convenience function, avoid using it, it's relatively expensive (less expensive then str() though) 0170 */ 0171 QByteArray byteArray() const; 0172 0173 IndexedString& operator=(const IndexedString&) noexcept; 0174 0175 friend KDEVPLATFORMSERIALIZATION_EXPORT void swap(IndexedString&, IndexedString&) noexcept; 0176 0177 /** 0178 * Fast index-based comparison 0179 */ 0180 bool operator ==(const IndexedString& rhs) const 0181 { 0182 return m_index == rhs.m_index; 0183 } 0184 0185 /** 0186 * Fast index-based comparison 0187 */ 0188 bool operator !=(const IndexedString& rhs) const 0189 { 0190 return m_index != rhs.m_index; 0191 } 0192 0193 /** 0194 * Does not compare alphabetically, uses the index for ordering. 0195 */ 0196 bool operator <(const IndexedString& rhs) const 0197 { 0198 return m_index < rhs.m_index; 0199 } 0200 0201 /** 0202 * Use this to construct a hash-value on-the-fly 0203 * 0204 * To read it, just use the hash member, and when a new string is started, call @c clear(). 0205 * 0206 * This needs very fast performance(per character operation), so it must stay inlined. 0207 */ 0208 struct RunningHash 0209 { 0210 enum { 0211 HashInitialValue = 5381 0212 }; 0213 0214 RunningHash() 0215 { 0216 } 0217 inline void append(const char c) 0218 { 0219 hash = ((hash << 5) + hash) + c; /* hash * 33 + c */ 0220 } 0221 inline void clear() 0222 { 0223 hash = HashInitialValue; 0224 } 0225 0226 /// We initialize the hash with zero, because we want empty strings to create a zero hash(invalid) 0227 unsigned int hash = HashInitialValue; 0228 }; 0229 0230 static unsigned int hashString(const char* str, unsigned short length); 0231 0232 /** 0233 * Optimized function that only computes the index of a string 0234 * removes the overhead of the IndexedString ref counting 0235 */ 0236 static uint indexForString(const char* str, unsigned short length, uint hash = 0); 0237 static uint indexForString(const QString& str, uint hash = 0); 0238 0239 private: 0240 uint m_index = 0; 0241 }; 0242 0243 // the following function would need to be exported in case you'd remove the inline keyword. 0244 inline uint qHash(const KDevelop::IndexedString& str) 0245 { 0246 return str.index(); 0247 } 0248 } 0249 0250 /** 0251 * qDebug() stream operator. Writes the string to the debug output. 0252 */ 0253 KDEVPLATFORMSERIALIZATION_EXPORT QDebug operator<<(QDebug s, const KDevelop::IndexedString& string); 0254 0255 Q_DECLARE_METATYPE(KDevelop::IndexedString) 0256 Q_DECLARE_TYPEINFO(KDevelop::IndexedString, Q_MOVABLE_TYPE); 0257 0258 #endif