File indexing completed on 2024-05-05 04:38:09
0001 /* 0002 SPDX-FileCopyrightText: 2008 David Nolden <david.nolden.kdevelop@art-master.de> 0003 SPDX-FileCopyrightText: 2016 Milian Wolff <mail@milianw.de> 0004 0005 SPDX-License-Identifier: GPL-2.0-or-later 0006 */ 0007 0008 #include "indexedstring.h" 0009 0010 #include "itemrepository.h" 0011 #include "referencecounting.h" 0012 #include "repositorymanager.h" 0013 0014 #include <utility> 0015 0016 using namespace KDevelop; 0017 0018 namespace { 0019 struct IndexedStringData 0020 { 0021 unsigned short length; 0022 uint refCount; 0023 0024 IndexedStringData& operator=(const IndexedStringData& rhs) = delete; 0025 0026 uint itemSize() const 0027 { 0028 return sizeof(IndexedStringData) + length; 0029 } 0030 0031 uint hash() const 0032 { 0033 IndexedString::RunningHash running; 0034 const char* str = reinterpret_cast<const char*>(this) + sizeof(IndexedStringData); 0035 for (int a = length - 1; a >= 0; --a) { 0036 running.append(*str); 0037 ++str; 0038 } 0039 0040 return running.hash; 0041 } 0042 }; 0043 0044 struct IndexedStringRepositoryItemRequest 0045 { 0046 //The text is supposed to be utf8 encoded 0047 IndexedStringRepositoryItemRequest(const char* text, uint hash, unsigned short length) 0048 : m_hash(hash) 0049 , m_length(length) 0050 , m_text(text) 0051 { 0052 } 0053 0054 enum { 0055 AverageSize = 10 //This should be the approximate average size of an Item 0056 }; 0057 0058 using HashType = uint; 0059 0060 //Should return the hash-value associated with this request(For example the hash of a string) 0061 HashType hash() const 0062 { 0063 return m_hash; 0064 } 0065 0066 //Should return the size of an item created with createItem 0067 uint itemSize() const 0068 { 0069 return sizeof(IndexedStringData) + m_length; 0070 } 0071 0072 //Should create an item where the information of the requested item is permanently stored. The pointer 0073 //@param item equals an allocated range with the size of itemSize(). 0074 void createItem(IndexedStringData* item) const 0075 { 0076 item->length = m_length; 0077 item->refCount = 0; 0078 void* itemText = reinterpret_cast<void*>(item + 1); 0079 memcpy(itemText, m_text, m_length); 0080 } 0081 0082 static void destroy(IndexedStringData* item, AbstractItemRepository&) 0083 { 0084 Q_UNUSED(item); 0085 //Nothing to do here (The object is not intelligent) 0086 } 0087 0088 static bool persistent(const IndexedStringData* item) 0089 { 0090 return ( bool )item->refCount; 0091 } 0092 0093 //Should return whether the here requested item equals the given item 0094 bool equals(const IndexedStringData* item) const 0095 { 0096 return item->length == m_length && (memcmp(++item, m_text, m_length) == 0); 0097 } 0098 0099 uint m_hash; 0100 unsigned short m_length; 0101 const char* m_text; 0102 }; 0103 0104 inline const char* c_strFromItem(const IndexedStringData* item) 0105 { 0106 return reinterpret_cast<const char*>(item + 1); 0107 } 0108 0109 ///@param item must be valid(nonzero) 0110 inline QString stringFromItem(const IndexedStringData* item) 0111 { 0112 return QString::fromUtf8(c_strFromItem(item), item->length); 0113 } 0114 0115 inline QByteArray arrayFromItem(const IndexedStringData* item) 0116 { 0117 return QByteArray(c_strFromItem(item), item->length); 0118 } 0119 0120 inline bool isSingleCharIndex(uint index) 0121 { 0122 return (index & 0xffff0000) == 0xffff0000; 0123 } 0124 0125 inline uint charToIndex(char c) 0126 { 0127 return 0xffff0000 | c; 0128 } 0129 0130 inline char indexToChar(uint index) 0131 { 0132 Q_ASSERT(isSingleCharIndex(index)); 0133 return static_cast<char>(index & 0xff); 0134 } 0135 0136 using IndexedStringRepository = ItemRepository<IndexedStringData, IndexedStringRepositoryItemRequest, false>; 0137 } 0138 0139 namespace KDevelop 0140 { 0141 template<> 0142 class ItemRepositoryFor<IndexedString> 0143 { 0144 friend struct LockedItemRepository; 0145 static IndexedStringRepository& repo() 0146 { 0147 static QMutex mutex; 0148 static RepositoryManager<IndexedStringRepository, true, false> manager { QStringLiteral("String Index"), 0149 &mutex }; 0150 return *manager.repository(); 0151 } 0152 }; 0153 } 0154 0155 namespace 0156 { 0157 class ReferenceCountChanger 0158 { 0159 public: 0160 static ReferenceCountChanger increase(unsigned index) 0161 { 0162 return {index, 1}; 0163 } 0164 static ReferenceCountChanger decrease(unsigned index) 0165 { 0166 return {index, static_cast<Summand>(-1)}; // unsigned integer overflow is fine 0167 } 0168 0169 void editRepo() const 0170 { 0171 if (m_index && !isSingleCharIndex(m_index)) { 0172 LockedItemRepository::write<IndexedString>(*this); 0173 } 0174 } 0175 0176 void operator()(IndexedStringRepository& repo) const 0177 { 0178 repo.dynamicItemFromIndexSimple(m_index)->refCount += m_summand; 0179 } 0180 0181 private: 0182 using Summand = decltype(IndexedStringData::refCount); 0183 0184 ReferenceCountChanger(unsigned i, Summand s) 0185 : m_index{i} 0186 , m_summand{s} 0187 {} 0188 0189 unsigned m_index; 0190 Summand m_summand; 0191 }; 0192 inline void ref(unsigned index) 0193 { 0194 ReferenceCountChanger::increase(index).editRepo(); 0195 } 0196 inline void deref(unsigned index) 0197 { 0198 ReferenceCountChanger::decrease(index).editRepo(); 0199 } 0200 } 0201 0202 ///@param str must be a utf8 encoded string, does not need to be 0-terminated. 0203 ///@param length must be its length in bytes. 0204 IndexedString::IndexedString(const char* str, unsigned short length, uint hash) 0205 { 0206 if (!length) { 0207 m_index = 0; 0208 } else if (length == 1) { 0209 m_index = charToIndex(str[0]); 0210 } else { 0211 const auto request = IndexedStringRepositoryItemRequest(str, hash ? hash : hashString(str, length), length); 0212 bool refcount = shouldDoDUChainReferenceCounting(this); 0213 m_index = LockedItemRepository::write<IndexedString>([request, refcount](IndexedStringRepository& repo) { 0214 auto index = repo.index(request); 0215 if (refcount) { 0216 ReferenceCountChanger::increase(index)(repo); 0217 } 0218 return index; 0219 }); 0220 } 0221 } 0222 0223 IndexedString::IndexedString(char c) 0224 : m_index(charToIndex(c)) 0225 {} 0226 0227 IndexedString::IndexedString(const QUrl& url) 0228 : IndexedString(url.isLocalFile() ? url.toLocalFile() : url.toString()) 0229 { 0230 Q_ASSERT(url.isEmpty() || !url.isRelative()); 0231 #if !defined(QT_NO_DEBUG) 0232 if (url != url.adjusted(QUrl::NormalizePathSegments)) { 0233 qWarning() << "wrong url" << url << url.adjusted(QUrl::NormalizePathSegments); 0234 } 0235 #endif 0236 Q_ASSERT(url == url.adjusted(QUrl::NormalizePathSegments)); 0237 } 0238 0239 IndexedString::IndexedString(QStringView string) 0240 : IndexedString(string.toUtf8()) 0241 {} 0242 0243 IndexedString::IndexedString(const char* str) 0244 : IndexedString(str, str ? qstrlen(str) : 0) 0245 {} 0246 0247 IndexedString::IndexedString(const QByteArray& str) 0248 : IndexedString(str.constData(), str.length()) 0249 {} 0250 0251 // NOTE: the definitions of ref() and deref() are so complex that they can throw exceptions 0252 // for many reasons. Yet the functions below, which call ref() and/or deref(), are 0253 // implicitly (the destructor) or explicitly (the rest) noexcept. The noexcept-ness of 0254 // these functions is important for correctness and performance. This is safe at the moment, 0255 // because the entire KDevPlatformSerialization library, that contains IndexedString, is 0256 // compiled with exceptions disabled (-fno-exceptions), which already prevents exception 0257 // propagation to a caller of any non-inline function in this library. 0258 0259 IndexedString::~IndexedString() 0260 { 0261 if (shouldDoDUChainReferenceCounting(this)) { 0262 deref(m_index); 0263 } 0264 } 0265 0266 IndexedString::IndexedString(const IndexedString& rhs) noexcept 0267 : m_index(rhs.m_index) 0268 { 0269 if (shouldDoDUChainReferenceCounting(this)) { 0270 ref(m_index); 0271 } 0272 } 0273 0274 IndexedString& IndexedString::operator=(const IndexedString& rhs) noexcept 0275 { 0276 if (m_index == rhs.m_index) { 0277 return *this; 0278 } 0279 0280 if (shouldDoDUChainReferenceCounting(this)) { 0281 deref(m_index); 0282 ref(rhs.m_index); 0283 } 0284 0285 m_index = rhs.m_index; 0286 return *this; 0287 } 0288 0289 namespace KDevelop { 0290 void swap(IndexedString& a, IndexedString& b) noexcept 0291 { 0292 using std::swap; 0293 0294 if (a.m_index == b.m_index) { 0295 return; 0296 } 0297 swap(a.m_index, b.m_index); 0298 0299 const bool aRc = shouldDoDUChainReferenceCounting(&a); 0300 const bool bRc = shouldDoDUChainReferenceCounting(&b); 0301 0302 if (aRc == bRc) { 0303 return; 0304 } 0305 0306 auto noLongerRefCountedIndex = b.m_index; 0307 auto newlyRefCountedIndex = a.m_index; 0308 if (bRc) { 0309 swap(noLongerRefCountedIndex, newlyRefCountedIndex); 0310 } 0311 deref(noLongerRefCountedIndex); 0312 ref(newlyRefCountedIndex); 0313 } 0314 } 0315 0316 QUrl IndexedString::toUrl() const 0317 { 0318 if (isEmpty()) { 0319 return {}; 0320 } 0321 QUrl ret = QUrl::fromUserInput(str()); 0322 Q_ASSERT(!ret.isRelative()); 0323 return ret; 0324 } 0325 0326 QString IndexedString::str() const 0327 { 0328 if (!m_index) { 0329 return QString(); 0330 } else if (isSingleCharIndex(m_index)) { 0331 return QString(QLatin1Char(indexToChar(m_index))); 0332 } else { 0333 const uint index = m_index; 0334 return LockedItemRepository::read<IndexedString>([index](const IndexedStringRepository& repo) { 0335 return stringFromItem(repo.itemFromIndex(index)); 0336 }); 0337 } 0338 } 0339 0340 int IndexedString::length() const 0341 { 0342 return lengthFromIndex(m_index); 0343 } 0344 0345 int IndexedString::lengthFromIndex(uint index) 0346 { 0347 if (!index) { 0348 return 0; 0349 } else if (isSingleCharIndex(index)) { 0350 return 1; 0351 } else { 0352 return LockedItemRepository::read<IndexedString>([index](const IndexedStringRepository& repo) { 0353 return repo.itemFromIndex(index)->length; 0354 }); 0355 } 0356 } 0357 0358 const char* IndexedString::c_str() const 0359 { 0360 if (!m_index) { 0361 return nullptr; 0362 } else if (isSingleCharIndex(m_index)) { 0363 #if Q_BYTE_ORDER == Q_LITTLE_ENDIAN 0364 const uint offset = 0; 0365 #else 0366 const uint offset = 3; 0367 #endif 0368 return reinterpret_cast<const char*>(&m_index) + offset; 0369 } else { 0370 const uint index = m_index; 0371 return LockedItemRepository::read<IndexedString>([index](const IndexedStringRepository& repo) { 0372 return c_strFromItem(repo.itemFromIndex(index)); 0373 }); 0374 } 0375 } 0376 0377 QByteArray IndexedString::byteArray() const 0378 { 0379 if (!m_index) { 0380 return QByteArray(); 0381 } else if (isSingleCharIndex(m_index)) { 0382 return QByteArray(1, indexToChar(m_index)); 0383 } else { 0384 const uint index = m_index; 0385 return LockedItemRepository::read<IndexedString>([index](const IndexedStringRepository& repo) { 0386 return arrayFromItem(repo.itemFromIndex(index)); 0387 }); 0388 } 0389 } 0390 0391 uint IndexedString::hashString(const char* str, unsigned short length) 0392 { 0393 RunningHash running; 0394 for (int a = length - 1; a >= 0; --a) { 0395 running.append(*str); 0396 ++str; 0397 } 0398 0399 return running.hash; 0400 } 0401 0402 uint IndexedString::indexForString(const char* str, short unsigned length, uint hash) 0403 { 0404 if (!length) { 0405 return 0; 0406 } else if (length == 1) { 0407 return charToIndex(str[0]); 0408 } else { 0409 const auto request = IndexedStringRepositoryItemRequest(str, hash ? hash : hashString(str, length), length); 0410 return LockedItemRepository::write<IndexedString>([request](IndexedStringRepository& repo) { 0411 return repo.index(request); 0412 }); 0413 } 0414 } 0415 0416 uint IndexedString::indexForString(const QString& str, uint hash) 0417 { 0418 const QByteArray array(str.toUtf8()); 0419 return indexForString(array.constBegin(), array.size(), hash); 0420 } 0421 0422 QDebug operator<<(QDebug s, const IndexedString& string) 0423 { 0424 s.nospace() << string.str(); 0425 return s.space(); 0426 }