File indexing completed on 2024-05-05 04:38:09

0001 /*
0002     SPDX-FileCopyrightText: 2008 David Nolden <david.nolden.kdevelop@art-master.de>
0003     SPDX-FileCopyrightText: 2016 Milian Wolff <mail@milianw.de>
0004 
0005     SPDX-License-Identifier: GPL-2.0-or-later
0006 */
0007 
0008 #include "indexedstring.h"
0009 
0010 #include "itemrepository.h"
0011 #include "referencecounting.h"
0012 #include "repositorymanager.h"
0013 
0014 #include <utility>
0015 
0016 using namespace KDevelop;
0017 
0018 namespace {
0019 struct IndexedStringData
0020 {
0021     unsigned short length;
0022     uint refCount;
0023 
0024     IndexedStringData& operator=(const IndexedStringData& rhs) = delete;
0025 
0026     uint itemSize() const
0027     {
0028         return sizeof(IndexedStringData) + length;
0029     }
0030 
0031     uint hash() const
0032     {
0033         IndexedString::RunningHash running;
0034         const char* str = reinterpret_cast<const char*>(this) + sizeof(IndexedStringData);
0035         for (int a = length - 1; a >= 0; --a) {
0036             running.append(*str);
0037             ++str;
0038         }
0039 
0040         return running.hash;
0041     }
0042 };
0043 
0044 struct IndexedStringRepositoryItemRequest
0045 {
0046     //The text is supposed to be utf8 encoded
0047     IndexedStringRepositoryItemRequest(const char* text, uint hash, unsigned short length)
0048         : m_hash(hash)
0049         , m_length(length)
0050         , m_text(text)
0051     {
0052     }
0053 
0054     enum {
0055         AverageSize = 10 //This should be the approximate average size of an Item
0056     };
0057 
0058     using HashType = uint;
0059 
0060     //Should return the hash-value associated with this request(For example the hash of a string)
0061     HashType hash() const
0062     {
0063         return m_hash;
0064     }
0065 
0066     //Should return the size of an item created with createItem
0067     uint itemSize() const
0068     {
0069         return sizeof(IndexedStringData) + m_length;
0070     }
0071 
0072     //Should create an item where the information of the requested item is permanently stored. The pointer
0073     //@param item equals an allocated range with the size of itemSize().
0074     void createItem(IndexedStringData* item) const
0075     {
0076         item->length = m_length;
0077         item->refCount = 0;
0078         void* itemText = reinterpret_cast<void*>(item + 1);
0079         memcpy(itemText, m_text, m_length);
0080     }
0081 
0082     static void destroy(IndexedStringData* item, AbstractItemRepository&)
0083     {
0084         Q_UNUSED(item);
0085         //Nothing to do here (The object is not intelligent)
0086     }
0087 
0088     static bool persistent(const IndexedStringData* item)
0089     {
0090         return ( bool )item->refCount;
0091     }
0092 
0093     //Should return whether the here requested item equals the given item
0094     bool equals(const IndexedStringData* item) const
0095     {
0096         return item->length == m_length && (memcmp(++item, m_text, m_length) == 0);
0097     }
0098 
0099     uint m_hash;
0100     unsigned short m_length;
0101     const char* m_text;
0102 };
0103 
0104 inline const char* c_strFromItem(const IndexedStringData* item)
0105 {
0106     return reinterpret_cast<const char*>(item + 1);
0107 }
0108 
0109 ///@param item must be valid(nonzero)
0110 inline QString stringFromItem(const IndexedStringData* item)
0111 {
0112     return QString::fromUtf8(c_strFromItem(item), item->length);
0113 }
0114 
0115 inline QByteArray arrayFromItem(const IndexedStringData* item)
0116 {
0117     return QByteArray(c_strFromItem(item), item->length);
0118 }
0119 
0120 inline bool isSingleCharIndex(uint index)
0121 {
0122     return (index & 0xffff0000) == 0xffff0000;
0123 }
0124 
0125 inline uint charToIndex(char c)
0126 {
0127     return 0xffff0000 | c;
0128 }
0129 
0130 inline char indexToChar(uint index)
0131 {
0132     Q_ASSERT(isSingleCharIndex(index));
0133     return static_cast<char>(index & 0xff);
0134 }
0135 
0136 using IndexedStringRepository = ItemRepository<IndexedStringData, IndexedStringRepositoryItemRequest, false>;
0137 }
0138 
0139 namespace KDevelop
0140 {
0141 template<>
0142 class ItemRepositoryFor<IndexedString>
0143 {
0144     friend struct LockedItemRepository;
0145     static IndexedStringRepository& repo()
0146     {
0147         static QMutex mutex;
0148         static RepositoryManager<IndexedStringRepository, true, false> manager { QStringLiteral("String Index"),
0149                                                                                  &mutex };
0150         return *manager.repository();
0151     }
0152 };
0153 }
0154 
0155 namespace
0156 {
0157 class ReferenceCountChanger
0158 {
0159 public:
0160     static ReferenceCountChanger increase(unsigned index)
0161     {
0162         return {index, 1};
0163     }
0164     static ReferenceCountChanger decrease(unsigned index)
0165     {
0166         return {index, static_cast<Summand>(-1)}; // unsigned integer overflow is fine
0167     }
0168 
0169     void editRepo() const
0170     {
0171         if (m_index && !isSingleCharIndex(m_index)) {
0172             LockedItemRepository::write<IndexedString>(*this);
0173         }
0174     }
0175 
0176     void operator()(IndexedStringRepository& repo) const
0177     {
0178         repo.dynamicItemFromIndexSimple(m_index)->refCount += m_summand;
0179     }
0180 
0181 private:
0182     using Summand = decltype(IndexedStringData::refCount);
0183 
0184     ReferenceCountChanger(unsigned i, Summand s)
0185         : m_index{i}
0186         , m_summand{s}
0187     {}
0188 
0189     unsigned m_index;
0190     Summand m_summand;
0191 };
0192 inline void ref(unsigned index)
0193 {
0194     ReferenceCountChanger::increase(index).editRepo();
0195 }
0196 inline void deref(unsigned index)
0197 {
0198     ReferenceCountChanger::decrease(index).editRepo();
0199 }
0200 }
0201 
0202 ///@param str must be a utf8 encoded string, does not need to be 0-terminated.
0203 ///@param length must be its length in bytes.
0204 IndexedString::IndexedString(const char* str, unsigned short length, uint hash)
0205 {
0206     if (!length) {
0207         m_index = 0;
0208     } else if (length == 1) {
0209         m_index = charToIndex(str[0]);
0210     } else {
0211         const auto request = IndexedStringRepositoryItemRequest(str, hash ? hash : hashString(str, length), length);
0212         bool refcount = shouldDoDUChainReferenceCounting(this);
0213         m_index = LockedItemRepository::write<IndexedString>([request, refcount](IndexedStringRepository& repo) {
0214             auto index = repo.index(request);
0215             if (refcount) {
0216                 ReferenceCountChanger::increase(index)(repo);
0217             }
0218             return index;
0219         });
0220     }
0221 }
0222 
0223 IndexedString::IndexedString(char c)
0224     : m_index(charToIndex(c))
0225 {}
0226 
0227 IndexedString::IndexedString(const QUrl& url)
0228     : IndexedString(url.isLocalFile() ? url.toLocalFile() : url.toString())
0229 {
0230     Q_ASSERT(url.isEmpty() || !url.isRelative());
0231 #if !defined(QT_NO_DEBUG)
0232     if (url != url.adjusted(QUrl::NormalizePathSegments)) {
0233         qWarning() << "wrong url" << url << url.adjusted(QUrl::NormalizePathSegments);
0234     }
0235 #endif
0236     Q_ASSERT(url == url.adjusted(QUrl::NormalizePathSegments));
0237 }
0238 
0239 IndexedString::IndexedString(QStringView string)
0240     : IndexedString(string.toUtf8())
0241 {}
0242 
0243 IndexedString::IndexedString(const char* str)
0244     : IndexedString(str, str ? qstrlen(str) : 0)
0245 {}
0246 
0247 IndexedString::IndexedString(const QByteArray& str)
0248     : IndexedString(str.constData(), str.length())
0249 {}
0250 
0251 // NOTE: the definitions of ref() and deref() are so complex that they can throw exceptions
0252 // for many reasons. Yet the functions below, which call ref() and/or deref(), are
0253 // implicitly (the destructor) or explicitly (the rest) noexcept. The noexcept-ness of
0254 // these functions is important for correctness and performance. This is safe at the moment,
0255 // because the entire KDevPlatformSerialization library, that contains IndexedString, is
0256 // compiled with exceptions disabled (-fno-exceptions), which already prevents exception
0257 // propagation to a caller of any non-inline function in this library.
0258 
0259 IndexedString::~IndexedString()
0260 {
0261     if (shouldDoDUChainReferenceCounting(this)) {
0262         deref(m_index);
0263     }
0264 }
0265 
0266 IndexedString::IndexedString(const IndexedString& rhs) noexcept
0267     : m_index(rhs.m_index)
0268 {
0269     if (shouldDoDUChainReferenceCounting(this)) {
0270         ref(m_index);
0271     }
0272 }
0273 
0274 IndexedString& IndexedString::operator=(const IndexedString& rhs) noexcept
0275 {
0276     if (m_index == rhs.m_index) {
0277         return *this;
0278     }
0279 
0280     if (shouldDoDUChainReferenceCounting(this)) {
0281         deref(m_index);
0282         ref(rhs.m_index);
0283     }
0284 
0285     m_index = rhs.m_index;
0286     return *this;
0287 }
0288 
0289 namespace KDevelop {
0290 void swap(IndexedString& a, IndexedString& b) noexcept
0291 {
0292     using std::swap;
0293 
0294     if (a.m_index == b.m_index) {
0295         return;
0296     }
0297     swap(a.m_index, b.m_index);
0298 
0299     const bool aRc = shouldDoDUChainReferenceCounting(&a);
0300     const bool bRc = shouldDoDUChainReferenceCounting(&b);
0301 
0302     if (aRc == bRc) {
0303         return;
0304     }
0305 
0306     auto noLongerRefCountedIndex = b.m_index;
0307     auto newlyRefCountedIndex = a.m_index;
0308     if (bRc) {
0309         swap(noLongerRefCountedIndex, newlyRefCountedIndex);
0310     }
0311     deref(noLongerRefCountedIndex);
0312     ref(newlyRefCountedIndex);
0313 }
0314 }
0315 
0316 QUrl IndexedString::toUrl() const
0317 {
0318     if (isEmpty()) {
0319         return {};
0320     }
0321     QUrl ret = QUrl::fromUserInput(str());
0322     Q_ASSERT(!ret.isRelative());
0323     return ret;
0324 }
0325 
0326 QString IndexedString::str() const
0327 {
0328     if (!m_index) {
0329         return QString();
0330     } else if (isSingleCharIndex(m_index)) {
0331         return QString(QLatin1Char(indexToChar(m_index)));
0332     } else {
0333         const uint index = m_index;
0334         return LockedItemRepository::read<IndexedString>([index](const IndexedStringRepository& repo) {
0335             return stringFromItem(repo.itemFromIndex(index));
0336         });
0337     }
0338 }
0339 
0340 int IndexedString::length() const
0341 {
0342     return lengthFromIndex(m_index);
0343 }
0344 
0345 int IndexedString::lengthFromIndex(uint index)
0346 {
0347     if (!index) {
0348         return 0;
0349     } else if (isSingleCharIndex(index)) {
0350         return 1;
0351     } else {
0352         return LockedItemRepository::read<IndexedString>([index](const IndexedStringRepository& repo) {
0353             return repo.itemFromIndex(index)->length;
0354         });
0355     }
0356 }
0357 
0358 const char* IndexedString::c_str() const
0359 {
0360     if (!m_index) {
0361         return nullptr;
0362     } else if (isSingleCharIndex(m_index)) {
0363 #if Q_BYTE_ORDER == Q_LITTLE_ENDIAN
0364         const uint offset = 0;
0365 #else
0366         const uint offset = 3;
0367 #endif
0368         return reinterpret_cast<const char*>(&m_index) + offset;
0369     } else {
0370         const uint index = m_index;
0371         return LockedItemRepository::read<IndexedString>([index](const IndexedStringRepository& repo) {
0372             return c_strFromItem(repo.itemFromIndex(index));
0373         });
0374     }
0375 }
0376 
0377 QByteArray IndexedString::byteArray() const
0378 {
0379     if (!m_index) {
0380         return QByteArray();
0381     } else if (isSingleCharIndex(m_index)) {
0382         return QByteArray(1, indexToChar(m_index));
0383     } else {
0384         const uint index = m_index;
0385         return LockedItemRepository::read<IndexedString>([index](const IndexedStringRepository& repo) {
0386             return arrayFromItem(repo.itemFromIndex(index));
0387         });
0388     }
0389 }
0390 
0391 uint IndexedString::hashString(const char* str, unsigned short length)
0392 {
0393     RunningHash running;
0394     for (int a = length - 1; a >= 0; --a) {
0395         running.append(*str);
0396         ++str;
0397     }
0398 
0399     return running.hash;
0400 }
0401 
0402 uint IndexedString::indexForString(const char* str, short unsigned length, uint hash)
0403 {
0404     if (!length) {
0405         return 0;
0406     } else if (length == 1) {
0407         return charToIndex(str[0]);
0408     } else {
0409         const auto request = IndexedStringRepositoryItemRequest(str, hash ? hash : hashString(str, length), length);
0410         return LockedItemRepository::write<IndexedString>([request](IndexedStringRepository& repo) {
0411             return repo.index(request);
0412         });
0413     }
0414 }
0415 
0416 uint IndexedString::indexForString(const QString& str, uint hash)
0417 {
0418     const QByteArray array(str.toUtf8());
0419     return indexForString(array.constBegin(), array.size(), hash);
0420 }
0421 
0422 QDebug operator<<(QDebug s, const IndexedString& string)
0423 {
0424     s.nospace() << string.str();
0425     return s.space();
0426 }