File indexing completed on 2024-05-05 04:38:10

0001 /*
0002     SPDX-FileCopyrightText: 2008 David Nolden <david.nolden.kdevelop@art-master.de>
0003 
0004     SPDX-License-Identifier: LGPL-2.0-only
0005 */
0006 
0007 #ifndef KDEVPLATFORM_ITEMREPOSITORY_H
0008 #define KDEVPLATFORM_ITEMREPOSITORY_H
0009 
0010 #include <QDebug>
0011 #include <QDir>
0012 #include <QFile>
0013 #include <QMutex>
0014 #include <QMutexLocker>
0015 
0016 #include <KMessageBox>
0017 #include <KLocalizedString>
0018 
0019 #include <algorithm>
0020 #include <memory>
0021 #include <type_traits>
0022 
0023 #include "referencecounting.h"
0024 #include "abstractitemrepository.h"
0025 #include "itemrepositoryregistry.h"
0026 
0027 //#define DEBUG_MONSTERBUCKETS
0028 
0029 // #define DEBUG_ITEMREPOSITORY_LOADING
0030 // #define ifDebugInfiniteRecursion(x) x
0031 #define ifDebugInfiniteRecursion(x)
0032 
0033 // #define ifDebugLostSpace(x) x
0034 #define ifDebugLostSpace(x)
0035 // #define DEBUG_INCORRECT_DELETE
0036 
0037 //Makes sure that all items stay reachable through the basic hash
0038 // #define DEBUG_ITEM_REACHABILITY
0039 
0040 ///@todo Dynamic bucket hash size
0041 
0042 #ifdef DEBUG_ITEM_REACHABILITY
0043 #define ENSURE_REACHABLE(bucket) Q_ASSERT(allItemsReachable(bucket));
0044 #define IF_ENSURE_REACHABLE(x) x
0045 #else
0046 #define ENSURE_REACHABLE(bucket)
0047 #define IF_ENSURE_REACHABLE(x)
0048 #endif
0049 
0050 #define ITEMREPOSITORY_USE_MMAP_LOADING
0051 
0052 //Assertion macro that prevents warnings if debugging is disabled
0053 //Only use it to verify values, it should not call any functions, since else the function will even be called in release mode
0054 #ifdef QT_NO_DEBUG
0055 #define VERIFY(X) if (!(X)) {qWarning() << "Failed to verify expression" << # X;}
0056 #else
0057 #define VERIFY(X) Q_ASSERT(X)
0058 #endif
0059 
0060 ///When this is uncommented, a 64-bit test-value is written behind the area an item is allowed to write into before
0061 ///createItem(..) is called, and an assertion triggers when it was changed during createItem(), which means createItem wrote too long.
0062 ///The problem: This temporarily overwrites valid data in the following item, so it will cause serious problems if that data is accessed
0063 ///during the call to createItem().
0064 // #define DEBUG_WRITING_EXTENTS
0065 
0066 class TestItemRepository;
0067 class BenchItemRepository;
0068 
0069 namespace KDevelop {
0070 namespace ItemRepositoryUtils {
0071 class FileMap
0072 {
0073     Q_DISABLE_COPY_MOVE(FileMap)
0074 public:
0075     explicit FileMap(char* data)
0076         : m_data(data)
0077     {
0078         Q_ASSERT(m_data);
0079     }
0080 
0081     template<typename T>
0082     void readValue(T* to)
0083     {
0084         Q_ASSERT(to);
0085         static_assert(std::is_integral_v<T>);
0086 
0087         *to = *reinterpret_cast<T*>(m_data);
0088         m_data += sizeof(T);
0089     }
0090 
0091     template<typename T>
0092     void readArray(T** to, uint size)
0093     {
0094         Q_ASSERT(to);
0095         static_assert(std::is_integral_v<T>);
0096 
0097         *to = reinterpret_cast<T*>(m_data);
0098         m_data += sizeof(T) * size;
0099     }
0100 
0101     char* current() const
0102     {
0103         return m_data;
0104     }
0105 
0106 private:
0107     char* m_data;
0108 };
0109 
0110 template<class T>
0111 void readValues(QIODevice* file, uint numValues, T* to)
0112 {
0113     Q_ASSERT(file);
0114     Q_ASSERT(to);
0115     static_assert(std::is_integral_v<T>);
0116 
0117     file->read(reinterpret_cast<char*>(to), sizeof(T) * numValues);
0118 }
0119 
0120 template<class T>
0121 void readValue(QIODevice* file, T* to)
0122 {
0123     readValues(file, 1, to);
0124 }
0125 
0126 template<typename T>
0127 void readList(QIODevice* file, QVector<T>* to)
0128 {
0129     Q_ASSERT(to);
0130     readValues(file, to->size(), to->data());
0131 }
0132 
0133 template<class T>
0134 void writeValues(QIODevice* file, uint numValues, const T* from)
0135 {
0136     Q_ASSERT(file);
0137     Q_ASSERT(from);
0138     static_assert(std::is_integral_v<T>);
0139 
0140     file->write(reinterpret_cast<const char*>(from), sizeof(T) * numValues);
0141 }
0142 
0143 template<class T>
0144 void writeValue(QIODevice* file, const T& from)
0145 {
0146     writeValues(file, 1, &from);
0147 }
0148 
0149 template<typename T>
0150 void writeList(QIODevice* file, const QVector<T>& from)
0151 {
0152     writeValues(file, from.size(), from.data());
0153 }
0154 }
0155 /**
0156  * This file implements a generic bucket-based indexing repository, that can be used for example to index strings.
0157  *
0158  * All you need to do is define your item type that you want to store into the repository, and create a request item
0159  * similar to ExampleItemRequest that compares and fills the defined item type.
0160  *
0161  * For example the string repository uses "unsigned short" as item-type, uses that actual value to store the length of the string,
0162  * and uses the space behind to store the actual string content.
0163  *
0164  * @see AbstractItemRepository
0165  * @see ItemRepository
0166  *
0167  * @see ExampleItem
0168  * @see ExampleItemRequest
0169  *
0170  * @see typerepository.h
0171  * @see stringrepository.h
0172  * @see indexedstring.h
0173  */
0174 
0175 enum {
0176     ItemRepositoryBucketSize = 1 << 16,
0177     ItemRepositoryBucketLimit = 1 << 16,
0178     ItemRepositoryBucketLinearGrowthFactor = 10,
0179 };
0180 
0181 /**
0182  * Buckets are the memory-units that are used to store the data in an ItemRepository.
0183  *
0184  * About monster buckets: Normally a bucket has a size of 64kb, but when an item is
0185  * allocated that is larger than that, a "monster bucket" is allocated, which spans the
0186  * space of multiple buckets.
0187  */
0188 template <class Item, class ItemRequest, bool markForReferenceCounting, uint fixedItemSize>
0189 class Bucket
0190 {
0191 public:
0192     enum {
0193         AdditionalSpacePerItem = 2
0194     };
0195     enum {
0196         ObjectMapSize = ((ItemRepositoryBucketSize / ItemRequest::AverageSize) * 3) / 2 + 1,
0197         MaxFreeItemsForHide = 0, //When less than this count of free items in one buckets is reached, the bucket is removed from the global list of buckets with free items
0198         MaxFreeSizeForHide = fixedItemSize ? fixedItemSize : 0, //Only when the largest free size is smaller then this, the bucket is taken from the free list
0199         MinFreeItemsForReuse = 10,//When this count of free items in one bucket is reached, consider re-assigning them to new requests
0200         MinFreeSizeForReuse = ItemRepositoryBucketSize / 20 //When the largest free item is bigger then this, the bucket is automatically added to the free list
0201     };
0202     enum {
0203         NextBucketHashSize = ObjectMapSize, //Affects the average count of bucket-chains that need to be walked in ItemRepository::index. Must be a multiple of ObjectMapSize
0204         DataSize = sizeof(char) + sizeof(unsigned int) * 3 + ItemRepositoryBucketSize + sizeof(short unsigned int) *
0205                    (ObjectMapSize + NextBucketHashSize + 1)
0206     };
0207     enum {
0208         CheckStart = 0xff00ff1,
0209         CheckEnd = 0xfafcfb
0210     };
0211     Bucket()
0212     {
0213     }
0214     Q_DISABLE_COPY_MOVE(Bucket)
0215     ~Bucket()
0216     {
0217         if (m_data != m_mappedData) {
0218             delete[] m_data;
0219             delete[] m_nextBucketHash;
0220             delete[] m_objectMap;
0221         }
0222     }
0223 
0224     void initialize(int monsterBucketExtent, std::unique_ptr<short unsigned int[]> nextBucketHashToRestore = {})
0225     {
0226         if (!m_data) {
0227             m_monsterBucketExtent = monsterBucketExtent;
0228             m_available = ItemRepositoryBucketSize;
0229             m_data = new char[dataSize()];
0230 #ifndef QT_NO_DEBUG
0231             std::fill_n(m_data, dataSize(), 0);
0232 #endif
0233             //The bigger we make the map, the lower the probability of a clash(and thus bad performance). However it increases memory usage.
0234             // NOTE: the `()` at the end of `new int[...]()` ensures the data is zero-initialized, see e.g.:
0235             //       https://stackoverflow.com/questions/7546620/operator-new-initializes-memory-to-zero
0236             m_objectMap = new short unsigned int[ObjectMapSize]();
0237 
0238             if (nextBucketHashToRestore) {
0239                 m_nextBucketHash = nextBucketHashToRestore.release();
0240             } else {
0241                 m_nextBucketHash = new short unsigned int[NextBucketHashSize]();
0242             }
0243 
0244             m_changed = true;
0245             m_dirty = false;
0246             m_lastUsed = 0;
0247         }
0248     }
0249 
0250     void initializeFromMap(char* fileMapData)
0251     {
0252         if (m_data) {
0253             return;
0254         }
0255 
0256         ItemRepositoryUtils::FileMap fileMap(fileMapData);
0257 
0258         fileMap.readValue(&m_monsterBucketExtent);
0259         Q_ASSERT(fileMap.current() - fileMapData == 4);
0260         fileMap.readValue(&m_available);
0261         fileMap.readArray(&m_objectMap, ObjectMapSize);
0262         fileMap.readArray(&m_nextBucketHash, NextBucketHashSize);
0263         fileMap.readValue(&m_largestFreeItem);
0264         fileMap.readValue(&m_freeItemCount);
0265         fileMap.readValue(&m_dirty);
0266 
0267         m_data = fileMap.current();
0268         m_mappedData = m_data;
0269 
0270         m_changed = false;
0271         m_lastUsed = 0;
0272         Q_ASSERT(fileMap.current() - fileMapData == DataSize - ItemRepositoryBucketSize);
0273     }
0274 
0275     void store(QFile* file, size_t offset)
0276     {
0277         using namespace ItemRepositoryUtils;
0278 
0279         if (!m_data)
0280             return;
0281 
0282         if (static_cast<size_t>(file->size()) < offset + (1 + m_monsterBucketExtent) * DataSize)
0283             file->resize(offset + (1 + m_monsterBucketExtent) * DataSize);
0284 
0285         file->seek(offset);
0286 
0287         writeValue(file, m_monsterBucketExtent);
0288         writeValue(file, m_available);
0289         writeValues(file, ObjectMapSize, m_objectMap);
0290         writeValues(file, NextBucketHashSize, m_nextBucketHash);
0291         writeValue(file, m_largestFreeItem);
0292         writeValue(file, m_freeItemCount);
0293         writeValue(file, m_dirty);
0294         writeValues(file, dataSize(), m_data);
0295 
0296         if (static_cast<size_t>(file->pos()) != offset + (1 + m_monsterBucketExtent) * DataSize) {
0297             KMessageBox::error(nullptr, i18n("Failed writing to %1, probably the disk is full", file->fileName()));
0298             abort();
0299         }
0300 
0301         m_changed = false;
0302 #ifdef DEBUG_ITEMREPOSITORY_LOADING
0303         {
0304             file->flush();
0305             file->seek(offset);
0306 
0307             uint available, freeItemCount;
0308             int monsterBucketExtent;
0309             short unsigned int largestFree;
0310             bool dirty;
0311 
0312             short unsigned int* m = new short unsigned int[ObjectMapSize];
0313             short unsigned int* h = new short unsigned int[NextBucketHashSize];
0314 
0315             readValue(file, &monsterBucketExtent);
0316             Q_ASSERT(monsterBucketExtent == m_monsterBucketExtent);
0317 
0318             char* d = new char[dataSize()];
0319 
0320             readValue(file, &available);
0321             readValues(file, ObjectMapSize, m);
0322             readValues(file, NextBucketHashSize, h);
0323             readValue(file, &largestFree);
0324             readValue(file, &freeItemCount);
0325             readValue(file, &dirty);
0326             readValues(file, dataSize(), d);
0327 
0328             Q_ASSERT(available == m_available);
0329             Q_ASSERT(std::equal(d, std::next(d, dataSize()), m_data));
0330             Q_ASSERT(std::equal(m, std::next(m, ObjectMapSize), m_objectMap));
0331             Q_ASSERT(std::equal(h, std::next(h, NextBucketHashSize), m_nextBucketHash));
0332             Q_ASSERT(m_largestFreeItem == largestFree);
0333             Q_ASSERT(m_freeItemCount == freeItemCount);
0334             Q_ASSERT(m_dirty == dirty);
0335 
0336             Q_ASSERT(static_cast<size_t>(file->pos()) == offset + DataSize + m_monsterBucketExtent * DataSize);
0337 
0338             delete[] d;
0339             delete[] m;
0340             delete[] h;
0341         }
0342 #endif
0343     }
0344 
0345     inline char* data()
0346     {
0347         return m_data;
0348     }
0349 
0350     inline uint dataSize() const
0351     {
0352         return ItemRepositoryBucketSize + m_monsterBucketExtent * DataSize;
0353     }
0354 
0355     //Tries to find the index this item has in this bucket, or returns zero if the item isn't there yet.
0356     unsigned short findIndex(const ItemRequest& request) const
0357     {
0358         m_lastUsed = 0;
0359 
0360         unsigned short localHash = request.hash() % ObjectMapSize;
0361         unsigned short index = m_objectMap[localHash];
0362 
0363         unsigned short follower = 0;
0364         //Walk the chain of items with the same local hash
0365         while (index && (follower = followerIndex(index)) && !(request.equals(itemFromIndex(index))))
0366             index = follower;
0367 
0368         if (index && request.equals(itemFromIndex(index))) {
0369             return index; //We have found the item
0370         }
0371 
0372         return 0;
0373     }
0374 
0375     //Tries to get the index within this bucket, or returns zero. Will put the item into the bucket if there is room.
0376     //Created indices will never begin with 0xffff____, so you can use that index-range for own purposes.
0377     unsigned short index(const ItemRequest& request, unsigned int itemSize)
0378     {
0379         m_lastUsed = 0;
0380 
0381         unsigned short localHash = request.hash() % ObjectMapSize;
0382         unsigned short index = m_objectMap[localHash];
0383         unsigned short insertedAt = 0;
0384 
0385         const auto createInsertedItem = [&]() {
0386             const OptionalDUChainReferenceCountingEnabler<markForReferenceCounting> optionalRc(m_data, dataSize());
0387             request.createItem(reinterpret_cast<Item*>(m_data + insertedAt));
0388         };
0389 
0390         unsigned short follower = 0;
0391         //Walk the chain of items with the same local hash
0392         while (index && (follower = followerIndex(index)) && !(request.equals(itemFromIndex(index))))
0393             index = follower;
0394 
0395         if (index && request.equals(itemFromIndex(index)))
0396             return index; //We have found the item
0397 
0398         ifDebugLostSpace(Q_ASSERT(!lostSpace()); )
0399 
0400         prepareChange();
0401 
0402         unsigned int totalSize = itemSize + AdditionalSpacePerItem;
0403 
0404         if (m_monsterBucketExtent) {
0405             /// This is a monster-bucket. Other rules are applied here:
0406             /// Only one item can be allocated, and that must be bigger than the bucket data
0407             if (!m_available)
0408                 return 0;
0409 
0410             Q_ASSERT(totalSize > ItemRepositoryBucketSize);
0411             m_available = 0;
0412 
0413             insertedAt = AdditionalSpacePerItem;
0414             setFollowerIndex(insertedAt, 0);
0415             Q_ASSERT(m_objectMap[localHash] == 0);
0416             m_objectMap[localHash] = insertedAt;
0417             createInsertedItem();
0418             return insertedAt;
0419         }
0420 
0421         //The second condition is needed, else we can get problems with zero-length items and an overflow in insertedAt to zero
0422         if (totalSize > m_available || (!itemSize && totalSize == m_available)) {
0423             //Try finding the smallest freed item that can hold the data
0424             unsigned short currentIndex = m_largestFreeItem;
0425             unsigned short previousIndex = 0;
0426 
0427             unsigned short freeChunkSize = 0;
0428 
0429             ///@todo Achieve this without full iteration
0430             while (currentIndex && freeSize(currentIndex) > itemSize) {
0431                 unsigned short follower = followerIndex(currentIndex);
0432                 if (follower && freeSize(follower) >= itemSize) {
0433                     //The item also fits into the smaller follower, so use that one
0434                     previousIndex = currentIndex;
0435                     currentIndex = follower;
0436                 } else {
0437                     //The item fits into currentIndex, but not into the follower. So use currentIndex
0438                     freeChunkSize = freeSize(currentIndex) - itemSize;
0439 
0440                     //We need 2 bytes to store the free size
0441                     if (freeChunkSize != 0 && freeChunkSize < AdditionalSpacePerItem + 2) {
0442                         //we can not manage the resulting free chunk as a separate item, so we cannot use this position.
0443                         //Just pick the biggest free item, because there we can be sure that
0444                         //either we can manage the split, or we cannot do anything at all in this bucket.
0445 
0446                         freeChunkSize = freeSize(m_largestFreeItem) - itemSize;
0447 
0448                         if (freeChunkSize == 0 || freeChunkSize >= AdditionalSpacePerItem + 2) {
0449                             previousIndex = 0;
0450                             currentIndex = m_largestFreeItem;
0451                         } else {
0452                             currentIndex = 0;
0453                         }
0454                     }
0455                     break;
0456                 }
0457             }
0458 
0459             if (!currentIndex || freeSize(currentIndex) < (totalSize - AdditionalSpacePerItem))
0460                 return 0;
0461 
0462             if (previousIndex)
0463                 setFollowerIndex(previousIndex, followerIndex(currentIndex));
0464             else
0465                 m_largestFreeItem = followerIndex(currentIndex);
0466 
0467             --m_freeItemCount; //Took one free item out of the chain
0468 
0469             ifDebugLostSpace(Q_ASSERT(( uint )lostSpace() == ( uint )(freeSize(currentIndex) + AdditionalSpacePerItem));
0470             )
0471 
0472             if (freeChunkSize) {
0473                 Q_ASSERT(freeChunkSize >= AdditionalSpacePerItem + 2);
0474                 unsigned short freeItemSize = freeChunkSize - AdditionalSpacePerItem;
0475 
0476                 unsigned short freeItemPosition;
0477                 //Insert the resulting free chunk into the list of free items, so we don't lose it
0478                 if (isBehindFreeSpace(currentIndex)) {
0479                     //Create the free item at the beginning of currentIndex, so it can be merged with the free space in front
0480                     freeItemPosition = currentIndex;
0481                     currentIndex += freeItemSize + AdditionalSpacePerItem;
0482                 } else {
0483                     //Create the free item behind currentIndex
0484                     freeItemPosition = currentIndex + itemSize + AdditionalSpacePerItem;
0485                 }
0486                 setFreeSize(freeItemPosition, freeItemSize);
0487                 insertFreeItem(freeItemPosition);
0488             }
0489 
0490             insertedAt = currentIndex;
0491             Q_ASSERT(( bool )m_freeItemCount == ( bool )m_largestFreeItem);
0492         } else {
0493             //We have to insert the item
0494             insertedAt = ItemRepositoryBucketSize - m_available;
0495 
0496             insertedAt += AdditionalSpacePerItem; //Room for the prepended follower-index
0497 
0498             m_available -= totalSize;
0499         }
0500 
0501         ifDebugLostSpace(Q_ASSERT(lostSpace() == totalSize); )
0502 
0503         Q_ASSERT(!index || !followerIndex(index));
0504 
0505         Q_ASSERT(!m_objectMap[localHash] || index);
0506 
0507         if (index)
0508             setFollowerIndex(index, insertedAt);
0509         setFollowerIndex(insertedAt, 0);
0510 
0511         if (m_objectMap[localHash] == 0)
0512             m_objectMap[localHash] = insertedAt;
0513 
0514 #ifdef DEBUG_CREATEITEM_EXTENTS
0515         char* borderBehind = m_data + insertedAt + (totalSize - AdditionalSpacePerItem);
0516 
0517         quint64 oldValueBehind = 0;
0518         if (m_available >= 8) {
0519             oldValueBehind = *( quint64* )borderBehind;
0520             *(( quint64* )borderBehind) = 0xfafafafafafafafaLLU;
0521         }
0522 #endif
0523 
0524         //Last thing we do, because createItem may recursively do even more transformation of the repository
0525         createInsertedItem();
0526 
0527 #ifdef DEBUG_CREATEITEM_EXTENTS
0528         if (m_available >= 8) {
0529             //If this assertion triggers, then the item writes a bigger range than it advertised in
0530             Q_ASSERT(*(( quint64* )borderBehind) == 0xfafafafafafafafaLLU);
0531             *(( quint64* )borderBehind) = oldValueBehind;
0532         }
0533 #endif
0534 
0535         Q_ASSERT(itemFromIndex(insertedAt)->hash() == request.hash());
0536         Q_ASSERT(itemFromIndex(insertedAt)->itemSize() == itemSize);
0537 
0538         ifDebugLostSpace(if (lostSpace()) qDebug() << "lost space:" << lostSpace(); Q_ASSERT(!lostSpace()); )
0539 
0540         return insertedAt;
0541     }
0542 
0543     /// @param modulo Returns whether this bucket contains an item with (hash % modulo) == (item.hash % modulo)
0544     ///               The default-parameter is the size of the next-bucket hash that is used by setNextBucketForHash and nextBucketForHash
0545     /// @note modulo MUST be a multiple of ObjectMapSize, because (b-a) | (x * h1) => (b-a) | h2, where a|b means a is a multiple of b.
0546     ///               This allows efficiently computing the clashes using the local object map hash.
0547     bool hasClashingItem(uint hash, uint modulo)
0548     {
0549         Q_ASSERT(modulo % ObjectMapSize == 0);
0550 
0551         m_lastUsed = 0;
0552 
0553         uint hashMod = hash % modulo;
0554         unsigned short localHash = hash % ObjectMapSize;
0555         unsigned short currentIndex = m_objectMap[localHash];
0556 
0557         if (currentIndex == 0)
0558             return false;
0559 
0560         while (currentIndex) {
0561             uint currentHash = itemFromIndex(currentIndex)->hash();
0562 
0563             Q_ASSERT(currentHash % ObjectMapSize == localHash);
0564 
0565             if (currentHash % modulo == hashMod)
0566                 return true; //Clash
0567             currentIndex = followerIndex(currentIndex);
0568         }
0569         return false;
0570     }
0571 
0572     void countFollowerIndexLengths(uint& usedSlots, uint& lengths, uint& slotCount, uint& longestInBucketFollowerChain)
0573     {
0574         for (uint a = 0; a < ObjectMapSize; ++a) {
0575             unsigned short currentIndex = m_objectMap[a];
0576             ++slotCount;
0577             uint length = 0;
0578 
0579             if (currentIndex) {
0580                 ++usedSlots;
0581 
0582                 while (currentIndex) {
0583                     ++length;
0584                     ++lengths;
0585                     currentIndex = followerIndex(currentIndex);
0586                 }
0587                 if (length > longestInBucketFollowerChain) {
0588 //             qDebug() << "follower-chain at" << a << ":" << length;
0589 
0590                     longestInBucketFollowerChain = length;
0591                 }
0592             }
0593         }
0594     }
0595 
0596     //Returns whether the given item is reachabe within this bucket, through its hash.
0597     bool itemReachable(const Item* item, uint hash) const
0598     {
0599         unsigned short localHash = hash % ObjectMapSize;
0600         unsigned short currentIndex = m_objectMap[localHash];
0601 
0602         while (currentIndex) {
0603             if (itemFromIndex(currentIndex) == item)
0604                 return true;
0605 
0606             currentIndex = followerIndex(currentIndex);
0607         }
0608         return false;
0609     }
0610 
0611     template <class Repository>
0612     void deleteItem(unsigned short index, unsigned int hash, Repository& repository)
0613     {
0614         ifDebugLostSpace(Q_ASSERT(!lostSpace()); )
0615 
0616         m_lastUsed = 0;
0617         prepareChange();
0618 
0619         unsigned int size = itemFromIndex(index)->itemSize();
0620         //Step 1: Remove the item from the data-structures that allow finding it: m_objectMap
0621         unsigned short localHash = hash % ObjectMapSize;
0622         unsigned short currentIndex = m_objectMap[localHash];
0623         unsigned short previousIndex = 0;
0624 
0625         //Fix the follower-link by setting the follower of the previous item to the next one, or updating m_objectMap
0626         while (currentIndex != index) {
0627             previousIndex = currentIndex;
0628             currentIndex = followerIndex(currentIndex);
0629             //If this assertion triggers, the deleted item was not registered under the given hash
0630             Q_ASSERT(currentIndex);
0631         }
0632         Q_ASSERT(currentIndex == index);
0633 
0634         if (!previousIndex)
0635             //The item was directly in the object map
0636             m_objectMap[localHash] = followerIndex(index);
0637         else
0638             setFollowerIndex(previousIndex, followerIndex(index));
0639 
0640         Item* item = const_cast<Item*>(itemFromIndex(index));
0641 
0642         {
0643             const OptionalDUChainReferenceCountingEnabler<markForReferenceCounting> optionalRc(m_data, dataSize());
0644             ItemRequest::destroy(item, repository);
0645         }
0646 
0647 #ifndef QT_NO_DEBUG
0648 #if defined(__GNUC__) && !defined(__INTEL_COMPILER) && (((__GNUC__ * 100) + __GNUC_MINOR__) >= 800)
0649 #pragma GCC diagnostic push
0650 #pragma GCC diagnostic ignored "-Wclass-memaccess"
0651 #endif
0652         //For debugging, so we notice the data is wrong
0653         std::fill_n(reinterpret_cast<char*>(item), size, 0);
0654 #if defined(__GNUC__) && !defined(__INTEL_COMPILER) && (((__GNUC__ * 100) + __GNUC_MINOR__) >= 800)
0655 #pragma GCC diagnostic pop
0656 #endif
0657 #endif
0658 
0659         if (m_monsterBucketExtent) {
0660             ///This is a monster-bucket. Make it completely empty again.
0661             Q_ASSERT(!m_available);
0662             m_available = ItemRepositoryBucketSize;
0663 
0664             //Items are always inserted into monster-buckets at a fixed position
0665             Q_ASSERT(currentIndex == AdditionalSpacePerItem);
0666             Q_ASSERT(m_objectMap[localHash] == 0);
0667         } else {
0668             ///Put the space into the free-set
0669             setFreeSize(index, size);
0670 
0671             //Try merging the created free item to other free items around, else add it into the free list
0672             insertFreeItem(index);
0673 
0674             if (m_freeItemCount == 1 && freeSize(m_largestFreeItem) + m_available == ItemRepositoryBucketSize) {
0675                 //Everything has been deleted, there is only free space left. Make the bucket empty again,
0676                 //so it can later also be used as a monster-bucket.
0677                 m_available = ItemRepositoryBucketSize;
0678                 m_freeItemCount = 0;
0679                 m_largestFreeItem = 0;
0680             }
0681         }
0682 
0683         Q_ASSERT(( bool )m_freeItemCount == ( bool )m_largestFreeItem);
0684         ifDebugLostSpace(Q_ASSERT(!lostSpace()); )
0685 #ifdef DEBUG_INCORRECT_DELETE
0686         //Make sure the item cannot be found any more
0687         {
0688             unsigned short localHash = hash % ObjectMapSize;
0689             unsigned short currentIndex = m_objectMap[localHash];
0690 
0691             while (currentIndex && currentIndex != index) {
0692                 previousIndex = currentIndex;
0693                 currentIndex = followerIndex(currentIndex);
0694             }
0695             Q_ASSERT(!currentIndex); //The item must not be found
0696         }
0697 #endif
0698 //       Q_ASSERT(canAllocateItem(size));
0699     }
0700 
0701     ///@warning The returned item may be in write-protected memory, so never try doing a const_cast and changing some data
0702     ///         If you need to change something, use dynamicItemFromIndex
0703     ///@warning When using multi-threading, mutex() must be locked as long as you use the returned data
0704     inline const Item* itemFromIndex(unsigned short index) const
0705     {
0706         m_lastUsed = 0;
0707         return reinterpret_cast<Item*>(m_data + index);
0708     }
0709 
0710     bool isEmpty() const
0711     {
0712         return m_available == ItemRepositoryBucketSize;
0713     }
0714 
0715     ///Returns true if this bucket has no nextBucketForHash links
0716     bool noNextBuckets() const
0717     {
0718         if (!m_nextBucketHash)
0719             return true;
0720 
0721         for (int a = 0; a < NextBucketHashSize; ++a)
0722             if (m_nextBucketHash[a])
0723                 return false;
0724 
0725         return true;
0726     }
0727 
0728     std::unique_ptr<short unsigned int[]> takeNextBucketHash()
0729     {
0730         if (m_mappedData == m_data) {
0731             // mmapped data, we need to copy the next bucket hash
0732             auto ret = std::make_unique<short unsigned int[]>(NextBucketHashSize);
0733             std::copy_n(m_nextBucketHash, NextBucketHashSize, ret.get());
0734             std::fill_n(m_nextBucketHash, NextBucketHashSize, 0);
0735             return ret;
0736         }
0737 
0738         // otherwise we can just take the pointer directly
0739         return std::unique_ptr<short unsigned int[]>(std::exchange(m_nextBucketHash, nullptr));
0740     }
0741 
0742     uint available() const
0743     {
0744         return m_available;
0745     }
0746 
0747     uint usedMemory() const
0748     {
0749         return ItemRepositoryBucketSize - m_available;
0750     }
0751 
0752     template <class Visitor>
0753     bool visitAllItems(Visitor& visitor) const
0754     {
0755         m_lastUsed = 0;
0756         for (uint a = 0; a < ObjectMapSize; ++a) {
0757             uint currentIndex = m_objectMap[a];
0758             while (currentIndex) {
0759                 //Get the follower early, so there is no problems when the current
0760                 //index is removed
0761 
0762                 if (!visitor(reinterpret_cast<const Item*>(m_data + currentIndex)))
0763                     return false;
0764 
0765                 currentIndex = followerIndex(currentIndex);
0766             }
0767         }
0768 
0769         return true;
0770     }
0771 
0772     ///Returns whether something was changed
0773     template <class Repository>
0774     int finalCleanup(Repository& repository)
0775     {
0776         int changed = 0;
0777 
0778         while (m_dirty) {
0779             m_dirty = false;
0780 
0781             for (uint a = 0; a < ObjectMapSize; ++a) {
0782                 uint currentIndex = m_objectMap[a];
0783                 while (currentIndex) {
0784                     //Get the follower early, so there is no problems when the current
0785                     //index is removed
0786 
0787                     const Item* item = reinterpret_cast<const Item*>(m_data + currentIndex);
0788 
0789                     if (!ItemRequest::persistent(item)) {
0790                         changed += item->itemSize();
0791                         deleteItem(currentIndex, item->hash(), repository);
0792                         m_dirty = true; //Set to dirty so we re-iterate
0793                         break;
0794                     }
0795 
0796                     currentIndex = followerIndex(currentIndex);
0797                 }
0798             }
0799         }
0800         return changed;
0801     }
0802 
0803     unsigned short nextBucketForHash(uint hash) const
0804     {
0805         m_lastUsed = 0;
0806         return m_nextBucketHash[hash % NextBucketHashSize];
0807     }
0808 
0809     void setNextBucketForHash(unsigned int hash, unsigned short bucket)
0810     {
0811         m_lastUsed = 0;
0812         prepareChange();
0813         m_nextBucketHash[hash % NextBucketHashSize] = bucket;
0814     }
0815 
0816     uint freeItemCount() const
0817     {
0818         return m_freeItemCount;
0819     }
0820 
0821     short unsigned int totalFreeItemsSize() const
0822     {
0823         short unsigned int ret = 0;
0824         short unsigned int currentIndex = m_largestFreeItem;
0825         while (currentIndex) {
0826             ret += freeSize(currentIndex);
0827             currentIndex = followerIndex(currentIndex);
0828         }
0829         return ret;
0830     }
0831 
0832     //Size of the largest item that could be inserted into this bucket
0833     short unsigned int largestFreeSize() const
0834     {
0835         short unsigned int ret = 0;
0836         if (m_largestFreeItem)
0837             ret = freeSize(m_largestFreeItem);
0838         if (m_available > ( uint )(AdditionalSpacePerItem + ( uint )ret)) {
0839             ret = m_available - AdditionalSpacePerItem;
0840             Q_ASSERT(ret == (m_available - AdditionalSpacePerItem));
0841         }
0842         return ret;
0843     }
0844 
0845     bool canAllocateItem(unsigned int size) const
0846     {
0847         short unsigned int currentIndex = m_largestFreeItem;
0848         while (currentIndex) {
0849             short unsigned int currentFree = freeSize(currentIndex);
0850             if (currentFree < size)
0851                 return false;
0852             //Either we need an exact match, or 2 additional bytes to manage the resulting gap
0853             if (size == currentFree || currentFree - size >= AdditionalSpacePerItem + 2)
0854                 return true;
0855             currentIndex = followerIndex(currentIndex);
0856         }
0857 
0858         return false;
0859     }
0860 
0861     void tick() const
0862     {
0863         ++m_lastUsed;
0864     }
0865 
0866     //How many ticks ago the item was last used
0867     int lastUsed() const
0868     {
0869         return m_lastUsed;
0870     }
0871 
0872     //Whether this bucket was changed since it was last stored
0873     bool changed() const
0874     {
0875         return m_changed;
0876     }
0877 
0878     void prepareChange()
0879     {
0880         m_changed = true;
0881         m_dirty = true;
0882         makeDataPrivate();
0883     }
0884 
0885     bool dirty() const
0886     {
0887         return m_dirty;
0888     }
0889 
0890     ///Returns the count of following buckets that were merged onto this buckets data array
0891     int monsterBucketExtent() const
0892     {
0893         return m_monsterBucketExtent;
0894     }
0895 
0896     //Counts together the space that is neither accessible through m_objectMap nor through the free items
0897     uint lostSpace()
0898     {
0899         if (m_monsterBucketExtent)
0900             return 0;
0901 
0902         uint need = ItemRepositoryBucketSize - m_available;
0903         uint found = 0;
0904 
0905         for (uint a = 0; a < ObjectMapSize; ++a) {
0906             uint currentIndex = m_objectMap[a];
0907             while (currentIndex) {
0908                 found += reinterpret_cast<const Item*>(m_data + currentIndex)->itemSize() + AdditionalSpacePerItem;
0909 
0910                 currentIndex = followerIndex(currentIndex);
0911             }
0912         }
0913 
0914         uint currentIndex = m_largestFreeItem;
0915         while (currentIndex) {
0916             found += freeSize(currentIndex) + AdditionalSpacePerItem;
0917 
0918             currentIndex = followerIndex(currentIndex);
0919         }
0920         return need - found;
0921     }
0922 
0923 private:
0924 
0925     void makeDataPrivate()
0926     {
0927         if (m_mappedData == m_data) {
0928             short unsigned int* oldObjectMap = m_objectMap;
0929             short unsigned int* oldNextBucketHash = m_nextBucketHash;
0930 
0931             m_data = new char[dataSize()];
0932             m_objectMap = new short unsigned int[ObjectMapSize];
0933             m_nextBucketHash = new short unsigned int[NextBucketHashSize];
0934 
0935             std::copy_n(m_mappedData, dataSize(), m_data);
0936             std::copy_n(oldObjectMap, ObjectMapSize, m_objectMap);
0937             std::copy_n(oldNextBucketHash, NextBucketHashSize, m_nextBucketHash);
0938         }
0939     }
0940 
0941     ///Merges the given index item, which must have a freeSize() set, to surrounding free items, and inserts the result.
0942     ///The given index itself should not be in the free items chain yet.
0943     ///Returns whether the item was inserted somewhere.
0944     void insertFreeItem(unsigned short index)
0945     {
0946         //If the item-size is fixed, we don't need to do any management. Just keep a list of free items. Items of other size will never be requested.
0947         if (!fixedItemSize) {
0948             unsigned short currentIndex = m_largestFreeItem;
0949             unsigned short previousIndex = 0;
0950 
0951             while (currentIndex) {
0952                 Q_ASSERT(currentIndex != index);
0953 
0954 #ifndef QT_NO_DEBUG
0955                 unsigned short currentFreeSize = freeSize(currentIndex);
0956 #endif
0957 
0958                 ///@todo Achieve this without iterating through all items in the bucket(This is very slow)
0959                 //Merge behind index
0960                 if (currentIndex == index + freeSize(index) + AdditionalSpacePerItem) {
0961                     //Remove currentIndex from the free chain, since it's merged backwards into index
0962                     if (previousIndex && followerIndex(currentIndex))
0963                         Q_ASSERT(freeSize(previousIndex) >= freeSize(followerIndex(currentIndex)));
0964 
0965                     if (previousIndex)
0966                         setFollowerIndex(previousIndex, followerIndex(currentIndex));
0967                     else
0968                         m_largestFreeItem = followerIndex(currentIndex);
0969 
0970                     --m_freeItemCount; //One was removed
0971 
0972                     //currentIndex is directly behind index, touching its space. Merge them.
0973                     setFreeSize(index, freeSize(index) + AdditionalSpacePerItem + freeSize(currentIndex));
0974 
0975                     //Recurse to do even more merging
0976                     insertFreeItem(index);
0977                     return;
0978                 }
0979 
0980                 //Merge before index
0981                 if (index == currentIndex + freeSize(currentIndex) + AdditionalSpacePerItem) {
0982                     if (previousIndex && followerIndex(currentIndex))
0983                         Q_ASSERT(freeSize(previousIndex) >= freeSize(followerIndex(currentIndex)));
0984 
0985                     //Remove currentIndex from the free chain, since insertFreeItem wants
0986                     //it not to be in the chain, and it will be inserted in another place
0987                     if (previousIndex)
0988                         setFollowerIndex(previousIndex, followerIndex(currentIndex));
0989                     else
0990                         m_largestFreeItem = followerIndex(currentIndex);
0991 
0992                     --m_freeItemCount; //One was removed
0993 
0994                     //index is directly behind currentIndex, touching its space. Merge them.
0995                     setFreeSize(currentIndex, freeSize(currentIndex) + AdditionalSpacePerItem + freeSize(index));
0996 
0997                     //Recurse to do even more merging
0998                     insertFreeItem(currentIndex);
0999                     return;
1000                 }
1001 
1002                 previousIndex = currentIndex;
1003                 currentIndex = followerIndex(currentIndex);
1004 #ifndef QT_NO_DEBUG
1005                 if (currentIndex)
1006                     Q_ASSERT(freeSize(currentIndex) <= currentFreeSize);
1007 #endif
1008             }
1009         }
1010         insertToFreeChain(index);
1011     }
1012 
1013     ///Only inserts the item in the correct position into the free chain. index must not be in the chain yet.
1014     void insertToFreeChain(unsigned short index)
1015     {
1016         if (!fixedItemSize) {
1017             ///@todo Use some kind of tree to find the correct position in the chain(This is very slow)
1018             //Insert the free item into the chain opened by m_largestFreeItem
1019             unsigned short currentIndex = m_largestFreeItem;
1020             unsigned short previousIndex = 0;
1021 
1022             unsigned short size = freeSize(index);
1023 
1024             while (currentIndex && freeSize(currentIndex) > size) {
1025                 Q_ASSERT(currentIndex != index); //must not be in the chain yet
1026                 previousIndex = currentIndex;
1027                 currentIndex = followerIndex(currentIndex);
1028             }
1029 
1030             if (currentIndex)
1031                 Q_ASSERT(freeSize(currentIndex) <= size);
1032 
1033             setFollowerIndex(index, currentIndex);
1034 
1035             if (previousIndex) {
1036                 Q_ASSERT(freeSize(previousIndex) >= size);
1037                 setFollowerIndex(previousIndex, index);
1038             } else
1039                 //This item is larger than all already registered free items, or there are none.
1040                 m_largestFreeItem = index;
1041         } else {
1042             Q_ASSERT(freeSize(index) == fixedItemSize);
1043             //When all items have the same size, just prepent to the front.
1044             setFollowerIndex(index, m_largestFreeItem);
1045             m_largestFreeItem = index;
1046         }
1047 
1048         ++m_freeItemCount;
1049     }
1050 
1051     /// Returns true if the given index is right behind free space, and thus can be merged to the free space.
1052     bool isBehindFreeSpace(unsigned short index) const
1053     {
1054         // TODO: Without iteration!
1055         unsigned short currentIndex = m_largestFreeItem;
1056 
1057         while (currentIndex) {
1058             if (index == currentIndex + freeSize(currentIndex) + AdditionalSpacePerItem)
1059                 return true;
1060 
1061             currentIndex = followerIndex(currentIndex);
1062         }
1063         return false;
1064     }
1065 
1066     /// @param index the index of an item @return The index of the next item in the chain of items with a same local hash, or zero
1067     inline unsigned short followerIndex(unsigned short index) const
1068     {
1069         Q_ASSERT(index >= 2);
1070         return *reinterpret_cast<unsigned short*>(m_data + (index - 2));
1071     }
1072 
1073     void setFollowerIndex(unsigned short index, unsigned short follower)
1074     {
1075         Q_ASSERT(index >= 2);
1076         *reinterpret_cast<unsigned short*>(m_data + (index - 2)) = follower;
1077     }
1078     // Only returns the current value if the item is actually free
1079     inline unsigned short freeSize(unsigned short index) const
1080     {
1081         return *reinterpret_cast<unsigned short*>(m_data + index);
1082     }
1083 
1084     //Convenience function to set the free-size, only for freed items
1085     void setFreeSize(unsigned short index, unsigned short size)
1086     {
1087         *reinterpret_cast<unsigned short*>(m_data + index) = size;
1088     }
1089 
1090 private:
1091     int m_monsterBucketExtent = 0; //If this is a monster-bucket, this contains the count of follower-buckets that belong to this one
1092     unsigned int m_available = 0;
1093     char* m_data = nullptr; //Structure of the data: <Position of next item with same hash modulo ItemRepositoryBucketSize>(2 byte), <Item>(item.size() byte)
1094     char* m_mappedData  = nullptr; //Read-only memory-mapped data. If this equals m_data, m_data must not be written
1095     short unsigned int* m_objectMap  = nullptr; //Points to the first object in m_data with (hash % ObjectMapSize) == index. Points to the item itself, so subtract 1 to get the pointer to the next item with same local hash.
1096     short unsigned int m_largestFreeItem  = 0; //Points to the largest item that is currently marked as free, or zero. That one points to the next largest one through followerIndex
1097     unsigned int m_freeItemCount  = 0;
1098 
1099     unsigned short* m_nextBucketHash  = nullptr;
1100 
1101     bool m_dirty = false; //Whether the data was changed since the last finalCleanup
1102     bool m_changed  = false; //Whether this bucket was changed since it was last stored to disk
1103     mutable int m_lastUsed = 0; //How many ticks ago this bucket was last accessed
1104 };
1105 
1106 ///This object needs to be kept alive as long as you change the contents of an item
1107 ///stored in the repository. It is needed to correctly track the reference counting
1108 ///within disk-storage.
1109 template <class Item, bool markForReferenceCounting>
1110 class DynamicItem : public OptionalDUChainReferenceCountingEnabler<markForReferenceCounting>
1111 {
1112 public:
1113     explicit DynamicItem(Item* i, const void* start, unsigned size)
1114         : OptionalDUChainReferenceCountingEnabler<markForReferenceCounting>(start, size)
1115         , m_item{i}
1116     {
1117 //       qDebug() << "enabling" << i << "to" << (void*)(((char*)i)+size);
1118     }
1119 
1120     Item* operator->() const { return m_item; }
1121 
1122 private:
1123     Item* const m_item;
1124 };
1125 
1126 struct ItemRepositoryStatistics {
1127     uint loadedBuckets = -1;
1128     uint currentBucket = -1;
1129     uint usedMemory = -1;
1130     uint loadedMonsterBuckets = -1;
1131     uint usedSpaceForBuckets = -1;
1132     uint freeSpaceInBuckets = -1;
1133     uint lostSpace = -1;
1134     uint freeUnreachableSpace = -1;
1135     uint hashClashedItems = -1;
1136     uint totalItems = -1;
1137     uint emptyBuckets;
1138     uint hashSize = -1; // How big the hash is
1139     uint hashUse = -1; // How many slots in the hash are used
1140     uint averageInBucketHashSize = -1;
1141     uint averageInBucketUsedSlotCount = -1;
1142     float averageInBucketSlotChainLength = -1;
1143     uint longestInBucketChain = -1;
1144 
1145     uint longestNextBucketChain = -1;
1146     uint totalBucketFollowerSlots = -1; // Total count of used slots in the nextBucketForHash structure
1147     float averageNextBucketForHashSequenceLength
1148         = -1; // Average sequence length of a nextBucketForHash sequence(If not empty)
1149 
1150     QString print() const
1151     {
1152         QString ret;
1153         ret += QStringLiteral("loaded buckets: %1 current bucket: %2 used memory: %3 loaded monster buckets: %4")
1154                    .arg(loadedBuckets)
1155                    .arg(currentBucket)
1156                    .arg(usedMemory)
1157                    .arg(loadedMonsterBuckets);
1158         ret += QStringLiteral("\nbucket hash clashed items: %1 total items: %2").arg(hashClashedItems).arg(totalItems);
1159         ret += QStringLiteral("\nused space for buckets: %1 free space in buckets: %2 lost space: %3")
1160                    .arg(usedSpaceForBuckets)
1161                    .arg(freeSpaceInBuckets)
1162                    .arg(lostSpace);
1163         ret += QStringLiteral("\nfree unreachable space: %1 empty buckets: %2")
1164                    .arg(freeUnreachableSpace)
1165                    .arg(emptyBuckets);
1166         ret += QStringLiteral("\nhash size: %1 hash slots used: %2").arg(hashSize).arg(hashUse);
1167         ret += QStringLiteral("\naverage in-bucket hash size: %1 average in-bucket used hash slot count: %2 average "
1168                               "in-bucket slot chain length: %3 longest in-bucket follower chain: %4")
1169                    .arg(averageInBucketHashSize)
1170                    .arg(averageInBucketUsedSlotCount)
1171                    .arg(averageInBucketSlotChainLength)
1172                    .arg(longestInBucketChain);
1173         ret += QStringLiteral("\ntotal count of used next-bucket-for-hash slots: %1 average next-bucket-for-hash "
1174                               "sequence length: %2 longest next-bucket chain: %3")
1175                    .arg(totalBucketFollowerSlots)
1176                    .arg(averageNextBucketForHashSequenceLength)
1177                    .arg(longestNextBucketChain);
1178         return ret;
1179     }
1180     operator QString() const { return print(); }
1181 };
1182 
1183 /**
1184  * The ItemRepository is essentially an on-disk key/value hash map
1185  *
1186  * Access to this structure is assumed to happen in a thread safe manner, e.g.
1187  * by locking a mutex externally. The API can then call itself without having
1188  * to re-lock anything internally, thus is capable to work with a non-recursive mutex.
1189  * If desired, a recursive mutex can be used too though as needed.
1190  *
1191  * @tparam Item See ExampleItem
1192  * @tparam ItemRequest See ExampleReqestItem
1193  * @tparam fixedItemSize When this is true, all inserted items must have the same size.
1194  *                      This greatly simplifies and speeds up the task of managing free items within the buckets.
1195  * @tparam markForReferenceCounting Whether the data within the repository should be marked for reference-counting.
1196  *                                 This costs a bit of performance, but must be enabled if there may be data in the
1197  *                                 repository that does on-disk reference counting, like IndexedString,
1198  *                                 IndexedIdentifier, etc.
1199  * @tparam Mutex The mutex type to use internally. It has to be locked externally before accessing the item repository
1200  *               from multiple threads.
1201  */
1202 
1203 template <class Item, class ItemRequest, bool markForReferenceCounting = true, typename Mutex = QMutex,
1204           uint fixedItemSize = 0, unsigned int targetBucketHashSize = 524288 * 2>
1205 class ItemRepository : public AbstractItemRepository
1206 {
1207     using MyBucket = Bucket<Item, ItemRequest, markForReferenceCounting, fixedItemSize>;
1208 
1209     enum {
1210         //Must be a multiple of Bucket::ObjectMapSize, so Bucket::hasClashingItem can be computed
1211         //Must also be a multiple of Bucket::NextBucketHashSize, for the same reason.(Currently those are same)
1212         bucketHashSize = (targetBucketHashSize / MyBucket::ObjectMapSize) * MyBucket::ObjectMapSize
1213     };
1214 
1215     enum {
1216         BucketStartOffset = sizeof(uint) * 7 + sizeof(short unsigned int) * bucketHashSize //Position in the data where the bucket array starts
1217     };
1218 
1219     Q_DISABLE_COPY_MOVE(ItemRepository)
1220 public:
1221     ///@param registry May be zero, then the repository will not be registered at all. Else, the repository will
1222     /// register itself to that registry.
1223     ///                If this is zero, you have to care about storing the data using store() and/or close() by
1224     ///                yourself. It does not happen automatically. For the global standard registry, the storing/loading
1225     ///                is triggered from within duchain, so you don't need to care about it.
1226     explicit ItemRepository(const QString& repositoryName, Mutex* mutex,
1227                             ItemRepositoryRegistry* registry = &globalItemRepositoryRegistry(),
1228                             uint repositoryVersion = 1)
1229         : m_repositoryName(repositoryName)
1230         , m_repositoryVersion(repositoryVersion)
1231         , m_mutex(mutex)
1232         , m_registry(registry)
1233     {
1234         if (m_registry)
1235             m_registry->registerRepository(this);
1236     }
1237 
1238     ~ItemRepository() override
1239     {
1240         if (m_registry)
1241             m_registry->unRegisterRepository(this);
1242 
1243         close();
1244     }
1245 
1246     ///Unloading of buckets is enabled by default. Use this to disable it. When unloading is enabled, the data
1247     ///gotten from must only itemFromIndex must not be used for a long time.
1248     void setUnloadingEnabled(bool enabled)
1249     {
1250         m_unloadingEnabled = enabled;
1251     }
1252 
1253     ///Returns the index for the given item. If the item is not in the repository yet, it is inserted.
1254     ///The index can never be zero. Zero is reserved for your own usage as invalid
1255     ///@param request Item to retrieve the index from
1256     unsigned int index(const ItemRequest& request)
1257     {
1258         const uint hash = request.hash();
1259         const uint size = request.itemSize();
1260 
1261         // Bucket indexes tracked while walking the bucket chain for this request hash
1262         unsigned short bucketInChainWithSpace = 0;
1263         unsigned short lastBucketWalked = 0;
1264 
1265         const ushort foundIndexInBucket = walkBucketChain(hash, [&](ushort bucketIdx, const MyBucket* bucketPtr) {
1266                 lastBucketWalked = bucketIdx;
1267 
1268                 const ushort found = bucketPtr->findIndex(request);
1269 
1270                 if (!found && !bucketInChainWithSpace && bucketPtr->canAllocateItem(size)) {
1271                     bucketInChainWithSpace = bucketIdx;
1272                 }
1273 
1274                 return found;
1275             });
1276 
1277         if (foundIndexInBucket) {
1278             // 'request' is already present, return the existing index
1279             Q_ASSERT(m_currentBucket);
1280             Q_ASSERT(m_currentBucket < m_buckets.size());
1281             return createIndex(lastBucketWalked, foundIndexInBucket);
1282         }
1283 
1284         /*
1285          * Disclaimer: Writer of comment != writer of code, believe with caution
1286          *
1287          * Requested item does not yet exist. Need to find a place to put it...
1288          *
1289          * First choice is to place it in an existing bucket in the chain for the request hash
1290          * Second choice is to find an existing bucket anywhere with enough space
1291          * Otherwise use m_currentBucket (the latest unused bucket)
1292          *
1293          * If the chosen bucket fails to allocate the item, merge buckets to create a monster (dragon?)
1294          *
1295          * Finally, if the first option failed or the selected bucket failed to allocate, add the
1296          * bucket which the item ended up in to the chain of buckets for the request's hash
1297          */
1298 
1299         m_metaDataChanged = true;
1300 
1301         const bool pickedBucketInChain = bucketInChainWithSpace;
1302         int useBucket = bucketInChainWithSpace;
1303 
1304         if (!pickedBucketInChain) {
1305             //Try finding an existing bucket with deleted space to store the data into
1306             for (int a = 0; a < m_freeSpaceBuckets.size(); ++a) {
1307                 MyBucket* bucketPtr = bucketForIndex(m_freeSpaceBuckets[a]);
1308                 Q_ASSERT(bucketPtr->largestFreeSize());
1309 
1310                 if (bucketPtr->canAllocateItem(size)) {
1311                     //The item fits into the bucket.
1312                     useBucket = m_freeSpaceBuckets[a];
1313                     break;
1314                 }
1315             }
1316         }
1317 
1318         auto advanceToNextBucket = [&]() {
1319             //This should never happen when we picked a bucket for re-use
1320             Q_ASSERT(!pickedBucketInChain);
1321             Q_ASSERT(useBucket == m_currentBucket);
1322             // note: the bucket may be empty and/or in the free list, but it might still be too small
1323             // for a monster bucket request
1324 
1325             ++m_currentBucket;
1326             Q_ASSERT(m_currentBucket < ItemRepositoryBucketLimit);
1327             useBucket = m_currentBucket;
1328         };
1329 
1330         //The item isn't in the repository yet, find a new bucket for it
1331         while (1) {
1332             if (useBucket >= m_buckets.size()) {
1333                 if (m_buckets.size() >= 0xfffe) { //We have reserved the last bucket index 0xffff for special purposes
1334                     //the repository has overflown.
1335                     qWarning() << "Found no room for an item in" << m_repositoryName << "size of the item:" <<
1336                         request.itemSize();
1337                     return 0;
1338                 } else {
1339                     allocateNextBuckets(ItemRepositoryBucketLinearGrowthFactor);
1340                 }
1341             }
1342 
1343             if (!useBucket) {
1344                 Q_ASSERT(m_currentBucket);
1345                 useBucket = m_currentBucket;
1346             }
1347 
1348             // don't put an item into the tail of a monster bucket
1349             if (m_monsterBucketTailMarker[useBucket]) {
1350                 advanceToNextBucket();
1351                 continue;
1352             }
1353 
1354             auto* bucketPtr = bucketForIndex(useBucket);
1355 
1356             ENSURE_REACHABLE(useBucket);
1357             Q_ASSERT_X(!bucketPtr->findIndex(
1358                            request), Q_FUNC_INFO,
1359                        "found item in unexpected bucket, ensure your ItemRequest::equals method is correct. Note: For custom AbstractType's e.g. ensure you have a proper equals() override");
1360 
1361             unsigned short indexInBucket = bucketPtr->index(request, size);
1362 
1363             //If we could not allocate the item in an empty bucket, then we need to create a monster-bucket that
1364             //can hold the data.
1365             if (bucketPtr->isEmpty() && !indexInBucket) {
1366                 ///@todo Move this compound statement into an own function
1367                 uint totalSize = size + MyBucket::AdditionalSpacePerItem;
1368 
1369                 Q_ASSERT((totalSize > ItemRepositoryBucketSize));
1370 
1371                 useBucket = 0;
1372                 //The item did not fit in, we need a monster-bucket(Merge consecutive buckets)
1373                 ///Step one: Search whether we can merge multiple empty buckets in the free-list into one monster-bucket
1374                 int rangeStart = -1;
1375                 int rangeEnd = -1;
1376                 for (int a = 0; a < m_freeSpaceBuckets.size(); ++a) {
1377                     MyBucket* bucketPtr = bucketForIndex(m_freeSpaceBuckets[a]);
1378                     if (bucketPtr->isEmpty()) {
1379                         //This bucket is a candidate for monster-bucket merging
1380                         int index = ( int )m_freeSpaceBuckets[a];
1381                         if (rangeEnd != index) {
1382                             rangeStart = index;
1383                             rangeEnd = index + 1;
1384                         } else {
1385                             ++rangeEnd;
1386                         }
1387                         if (rangeStart != rangeEnd) {
1388                             uint extent = rangeEnd - rangeStart - 1;
1389                             uint totalAvailableSpace = bucketForIndex(rangeStart)->available() +
1390                                                        MyBucket::DataSize* (rangeEnd - rangeStart - 1);
1391                             if (totalAvailableSpace > totalSize) {
1392                                 Q_ASSERT(extent);
1393                                 ///We can merge these buckets into one monster-bucket that can hold the data
1394                                 Q_ASSERT((uint)m_freeSpaceBuckets[a - extent] == (uint)rangeStart);
1395                                 useBucket = rangeStart;
1396                                 convertMonsterBucket(rangeStart, extent);
1397 
1398                                 break;
1399                             }
1400                         }
1401                     }
1402                 }
1403 
1404                 if (!useBucket) {
1405                     //Create a new monster-bucket at the end of the data
1406                     int needMonsterExtent = (totalSize - ItemRepositoryBucketSize) / MyBucket::DataSize + 1;
1407                     Q_ASSERT(needMonsterExtent);
1408                     const auto currentBucketIncrease = needMonsterExtent + 1;
1409                     Q_ASSERT(m_currentBucket);
1410                     if (m_currentBucket + currentBucketIncrease >= m_buckets.size()) {
1411                         allocateNextBuckets(ItemRepositoryBucketLinearGrowthFactor + currentBucketIncrease);
1412                     }
1413                     useBucket = m_currentBucket;
1414 
1415                     convertMonsterBucket(useBucket, needMonsterExtent);
1416                     m_currentBucket += currentBucketIncrease;
1417                     Q_ASSERT(m_currentBucket < ItemRepositoryBucketLimit);
1418                     Q_ASSERT(m_buckets[useBucket]);
1419                     Q_ASSERT(m_buckets[useBucket]->monsterBucketExtent() == needMonsterExtent);
1420                 }
1421                 Q_ASSERT(useBucket);
1422                 bucketPtr = bucketForIndex(useBucket);
1423 
1424                 indexInBucket = bucketPtr->index(request, size);
1425                 Q_ASSERT(indexInBucket);
1426             }
1427 
1428             Q_ASSERT(m_currentBucket);
1429 
1430             if (indexInBucket) {
1431                 ++m_statItemCount;
1432 
1433                 const int previousBucketNumber = lastBucketWalked;
1434                 unsigned short* const bucketHashPosition = m_firstBucketForHash + (hash % bucketHashSize);
1435 
1436                 if (!(*bucketHashPosition)) {
1437                     Q_ASSERT(!previousBucketNumber);
1438                     (*bucketHashPosition) = useBucket;
1439                     ENSURE_REACHABLE(useBucket);
1440                 } else if (!pickedBucketInChain && previousBucketNumber && previousBucketNumber != useBucket) {
1441                     //Should happen rarely
1442                     ++m_statBucketHashClashes;
1443 
1444                     ///Debug: Detect infinite recursion
1445                     ifDebugInfiniteRecursion(Q_ASSERT(walkBucketLinks(*bucketHashPosition, hash, previousBucketNumber));
1446                     )
1447 
1448                     //Find the position where the paths of useBucket and *bucketHashPosition intersect, and insert useBucket
1449                     //there. That way, we don't create loops.
1450                     QPair<unsigned int, unsigned int> intersect = hashChainIntersection(*bucketHashPosition, useBucket,
1451                                                                                         hash);
1452 
1453                     Q_ASSERT(m_buckets[previousBucketNumber]->nextBucketForHash(hash) == 0);
1454 
1455                     if (!intersect.second) {
1456                         ifDebugInfiniteRecursion(Q_ASSERT(!walkBucketLinks(*bucketHashPosition, hash, useBucket)); )
1457                         ifDebugInfiniteRecursion(Q_ASSERT(!walkBucketLinks(useBucket, hash, previousBucketNumber)); )
1458 
1459                         Q_ASSERT(m_buckets[previousBucketNumber]->nextBucketForHash(hash) == 0);
1460 
1461                         m_buckets[previousBucketNumber]->setNextBucketForHash(hash, useBucket);
1462                         ENSURE_REACHABLE(useBucket);
1463                         ENSURE_REACHABLE(previousBucketNumber);
1464 
1465                         ifDebugInfiniteRecursion(Q_ASSERT(walkBucketLinks(*bucketHashPosition, hash, useBucket)); )
1466                     } else if (intersect.first) {
1467                         ifDebugInfiniteRecursion(Q_ASSERT(bucketForIndex(intersect.first)->nextBucketForHash(hash) ==
1468                                                           intersect.second); )
1469                         ifDebugInfiniteRecursion(Q_ASSERT(!walkBucketLinks(*bucketHashPosition, hash, useBucket)); )
1470                         ifDebugInfiniteRecursion(Q_ASSERT(walkBucketLinks(*bucketHashPosition, hash, intersect.second));
1471                         )
1472                         ifDebugInfiniteRecursion(Q_ASSERT(walkBucketLinks(*bucketHashPosition, hash, intersect.first));
1473                         )
1474                         ifDebugInfiniteRecursion(Q_ASSERT(bucketForIndex(intersect.first)->nextBucketForHash(hash) ==
1475                                                           intersect.second); )
1476 
1477                         ifDebugInfiniteRecursion(Q_ASSERT(walkBucketLinks(useBucket, hash, intersect.second)); )
1478                         ifDebugInfiniteRecursion(Q_ASSERT(!walkBucketLinks(useBucket, hash, intersect.first)); )
1479 
1480                         bucketForIndex(intersect.first)->setNextBucketForHash(hash, useBucket);
1481 
1482                         ENSURE_REACHABLE(useBucket);
1483                         ENSURE_REACHABLE(intersect.second);
1484 
1485                         ifDebugInfiniteRecursion(Q_ASSERT(walkBucketLinks(*bucketHashPosition, hash, useBucket)); )
1486                         ifDebugInfiniteRecursion(Q_ASSERT(walkBucketLinks(*bucketHashPosition, hash, intersect.second));
1487                         )
1488                     } else {
1489                         //State: intersect.first == 0 && intersect.second != 0. This means that whole complete
1490                         //chain opened by *bucketHashPosition with the hash-value is also following useBucket,
1491                         //so useBucket can just be inserted at the top
1492 
1493                         ifDebugInfiniteRecursion(Q_ASSERT(!walkBucketLinks(*bucketHashPosition, hash, useBucket)); )
1494                         ifDebugInfiniteRecursion(Q_ASSERT(walkBucketLinks(useBucket, hash, *bucketHashPosition)); )
1495                         unsigned short oldStart = *bucketHashPosition;
1496 
1497                         *bucketHashPosition = useBucket;
1498 
1499                         ENSURE_REACHABLE(useBucket);
1500                         ENSURE_REACHABLE(oldStart);
1501                         Q_UNUSED(oldStart);
1502                     }
1503                 }
1504 
1505                 const auto reOrderFreeSpaceBucketIndex = m_freeSpaceBuckets.indexOf(useBucket);
1506                 if (reOrderFreeSpaceBucketIndex != -1)
1507                     updateFreeSpaceOrder(reOrderFreeSpaceBucketIndex);
1508 
1509                 Q_ASSERT(m_currentBucket < m_buckets.size());
1510                 return createIndex(useBucket, indexInBucket);
1511             } else {
1512                 advanceToNextBucket();
1513             }
1514         }
1515         //We haven't found a bucket that already contains the item, so append it to the current bucket
1516 
1517         qWarning() << "Found no bucket for an item in" << m_repositoryName;
1518         return 0;
1519     }
1520 
1521     ///Returns zero if the item is not in the repository yet
1522     unsigned int findIndex(const ItemRequest& request) const
1523     {
1524         return walkBucketChain(request.hash(), [this, &request](ushort bucketIdx, const MyBucket* bucketPtr) {
1525                 const ushort indexInBucket = bucketPtr->findIndex(request);
1526                 return indexInBucket ? createIndex(bucketIdx, indexInBucket) : 0u;
1527             });
1528     }
1529 
1530     /// Returns nullptr if the item is not in the repository yet
1531     const Item* findItem(const ItemRequest& request) const
1532     {
1533         auto index = findIndex(request);
1534         if (!index) {
1535             return nullptr;
1536         }
1537         return itemFromIndex(index);
1538     }
1539 
1540     ///Deletes the item from the repository.
1541     void deleteItem(unsigned int index)
1542     {
1543         verifyIndex(index);
1544 
1545         m_metaDataChanged = true;
1546 
1547         const uint hash = itemFromIndex(index)->hash();
1548         const ushort bucket = (index >> 16);
1549 
1550         //Apart from removing the item itself, we may have to recreate the nextBucketForHash link, so we need the previous bucket
1551         MyBucket* previousBucketPtr = nullptr;
1552         MyBucket* const bucketPtr = walkBucketChain(hash,
1553                                                     [bucket, &previousBucketPtr](ushort bucketIdx,
1554                                                                                  MyBucket* bucketPtr) -> MyBucket* {
1555                 if (bucket != bucketIdx) {
1556                     previousBucketPtr = bucketPtr;
1557                     return nullptr;
1558                 }
1559                 return bucketPtr; // found bucket, stop looking
1560             });
1561 
1562         //Make sure the index was reachable through the hash chain
1563         Q_ASSERT(bucketPtr);
1564 
1565         --m_statItemCount;
1566 
1567         bucketPtr->deleteItem(index, hash, *this);
1568 
1569         /**
1570          * Now check whether the link root/previousBucketNumber -> bucket is still needed.
1571          */
1572 
1573         if (!previousBucketPtr) {
1574             // This bucket is linked in the m_firstBucketForHash array, find the next clashing bucket in the chain
1575             // There may be items in the chain that clash only with MyBucket::NextBucketHashSize, skipped here
1576             m_firstBucketForHash[hash %
1577                                  bucketHashSize] = walkBucketChain(hash, [hash](ushort bucketIdx, MyBucket* bucketPtr) {
1578                     if (bucketPtr->hasClashingItem(hash, bucketHashSize)) {
1579                         return bucketIdx;
1580                     }
1581                     return static_cast<ushort>(0);
1582                 });
1583         } else if (!bucketPtr->hasClashingItem(hash, MyBucket::NextBucketHashSize)) {
1584             // TODO: Skip clashing items reachable from m_firstBucketForHash
1585             // (see note in usePermissiveModuloWhenRemovingClashLinks() test)
1586 
1587             ENSURE_REACHABLE(bucket);
1588 
1589             previousBucketPtr->setNextBucketForHash(hash, bucketPtr->nextBucketForHash(hash));
1590 
1591             Q_ASSERT(m_buckets[bucketPtr->nextBucketForHash(hash)] != previousBucketPtr);
1592         }
1593 
1594         ENSURE_REACHABLE(bucket);
1595 
1596         if (bucketPtr->monsterBucketExtent()) {
1597             //Convert the monster-bucket back to multiple normal buckets, and put them into the free list
1598             Q_ASSERT(bucketPtr->isEmpty());
1599             if (!previousBucketPtr) {
1600                 // see https://bugs.kde.org/show_bug.cgi?id=272408
1601                 // the monster bucket will be deleted and new smaller ones created
1602                 // the next bucket for this hash is invalid anyways as done above
1603                 // but calling the below unconditionally leads to other issues...
1604                 bucketPtr->setNextBucketForHash(hash, 0);
1605             }
1606             convertMonsterBucket(bucket, 0);
1607         } else {
1608             putIntoFreeList(bucket, bucketPtr);
1609         }
1610 
1611         Q_ASSERT(m_currentBucket);
1612         Q_ASSERT(m_currentBucket < m_buckets.size());
1613     }
1614 
1615     using MyDynamicItem = DynamicItem<Item, markForReferenceCounting>;
1616 
1617     ///This returns an editable version of the item. @warning: Never change an entry that affects the hash,
1618     ///or the equals(..) function. That would completely destroy the consistency.
1619     ///@param index The index. It must be valid(match an existing item), and nonzero.
1620     ///@warning If you use this, make sure you lock mutex() before calling,
1621     ///         and hold it until you're ready using/changing the data..
1622     MyDynamicItem dynamicItemFromIndex(unsigned int index)
1623     {
1624         verifyIndex(index);
1625 
1626         unsigned short bucket = (index >> 16);
1627 
1628         auto* bucketPtr = bucketForIndex(bucket);
1629         bucketPtr->prepareChange();
1630         unsigned short indexInBucket = index & 0xffff;
1631         return MyDynamicItem(const_cast<Item*>(bucketPtr->itemFromIndex(indexInBucket)),
1632                              bucketPtr->data(), bucketPtr->dataSize());
1633     }
1634 
1635     ///This returns an editable version of the item. @warning: Never change an entry that affects the hash,
1636     ///or the equals(..) function. That would completely destroy the consistency.
1637     ///@param index The index. It must be valid(match an existing item), and nonzero.
1638     ///@warning If you use this, make sure you lock mutex() before calling,
1639     ///         and hold it until you're ready using/changing the data..
1640     ///@warning If you change contained complex items that depend on reference-counting, you
1641     ///         must use dynamicItemFromIndex(..) instead of dynamicItemFromIndexSimple(..)
1642     Item* dynamicItemFromIndexSimple(unsigned int index)
1643     {
1644         verifyIndex(index);
1645 
1646         unsigned short bucket = (index >> 16);
1647 
1648         auto* bucketPtr = bucketForIndex(bucket);
1649         bucketPtr->prepareChange();
1650         unsigned short indexInBucket = index & 0xffff;
1651         return const_cast<Item*>(bucketPtr->itemFromIndex(indexInBucket));
1652     }
1653 
1654     ///@param index The index. It must be valid(match an existing item), and nonzero.
1655     const Item* itemFromIndex(unsigned int index) const
1656     {
1657         verifyIndex(index);
1658 
1659         unsigned short bucket = (index >> 16);
1660 
1661         const auto* bucketPtr = bucketForIndex(bucket);
1662         unsigned short indexInBucket = index & 0xffff;
1663         Q_ASSERT(m_currentBucket);
1664         Q_ASSERT(m_currentBucket < m_buckets.size());
1665         return bucketPtr->itemFromIndex(indexInBucket);
1666     }
1667 
1668     QString printStatistics() const final
1669     {
1670         return statistics().print();
1671     }
1672 
1673     ItemRepositoryStatistics statistics() const
1674     {
1675         Q_ASSERT(!m_currentBucket || m_currentBucket < m_buckets.size());
1676         ItemRepositoryStatistics ret;
1677         uint loadedBuckets = 0;
1678         for (auto* bucket : m_buckets) {
1679             if (bucket) {
1680                 ++loadedBuckets;
1681             }
1682         }
1683 
1684 #ifdef DEBUG_MONSTERBUCKETS
1685         for (int a = 0; a < m_freeSpaceBuckets.size(); ++a) {
1686             if (a > 0) {
1687                 uint prev = a - 1;
1688                 uint prevLargestFree = bucketForIndex(m_freeSpaceBuckets[prev])->largestFreeSize();
1689                 uint largestFree = bucketForIndex(m_freeSpaceBuckets[a])->largestFreeSize();
1690                 Q_ASSERT((prevLargestFree < largestFree) || (prevLargestFree == largestFree &&
1691                                                              m_freeSpaceBuckets[prev] < m_freeSpaceBuckets[a]));
1692             }
1693         }
1694 
1695 #endif
1696         ret.hashSize = bucketHashSize;
1697         ret.hashUse = 0;
1698         for (uint a = 0; a < bucketHashSize; ++a)
1699             if (m_firstBucketForHash[a])
1700                 ++ret.hashUse;
1701 
1702         ret.emptyBuckets = 0;
1703 
1704         uint loadedMonsterBuckets = 0;
1705         for (auto* bucket : m_buckets) {
1706             if (bucket && bucket->monsterBucketExtent()) {
1707                 loadedMonsterBuckets += bucket->monsterBucketExtent() + 1;
1708             }
1709         }
1710 
1711         uint usedBucketSpace = MyBucket::DataSize* m_currentBucket;
1712         uint freeBucketSpace = 0, freeUnreachableSpace = 0;
1713         uint lostSpace = 0; //Should be zero, else something is wrong
1714         uint totalInBucketHashSize = 0, totalInBucketUsedSlotCount = 0, totalInBucketChainLengths = 0;
1715         uint bucketCount = 0;
1716 
1717         ret.totalBucketFollowerSlots = 0;
1718         ret.averageNextBucketForHashSequenceLength = 0;
1719         ret.longestNextBucketChain = 0;
1720         ret.longestInBucketChain = 0;
1721 
1722         for (int a = 1; a < m_currentBucket + 1; ++a) {
1723             Q_ASSERT(!m_monsterBucketTailMarker[a]);
1724 
1725             MyBucket* bucket = bucketForIndex(a);
1726             if (bucket) {
1727                 ++bucketCount;
1728 
1729                 bucket->countFollowerIndexLengths(totalInBucketUsedSlotCount, totalInBucketChainLengths,
1730                                                   totalInBucketHashSize, ret.longestInBucketChain);
1731 
1732                 for (uint aa = 0; aa < MyBucket::NextBucketHashSize; ++aa) {
1733                     uint length = 0;
1734                     uint next = bucket->nextBucketForHash(aa);
1735                     if (next) {
1736                         ++ret.totalBucketFollowerSlots;
1737                         while (next) {
1738                             ++length;
1739                             ++ret.averageNextBucketForHashSequenceLength;
1740                             next = bucketForIndex(next)->nextBucketForHash(aa);
1741                         }
1742                     }
1743                     if (length > ret.longestNextBucketChain) {
1744 //             qDebug() << "next-bucket-chain in" << a << aa << ":" << length;
1745                         ret.longestNextBucketChain = length;
1746                     }
1747                 }
1748 
1749                 uint bucketFreeSpace = bucket->totalFreeItemsSize() + bucket->available();
1750                 freeBucketSpace += bucketFreeSpace;
1751                 if (m_freeSpaceBuckets.indexOf(a) == -1)
1752                     freeUnreachableSpace += bucketFreeSpace;
1753 
1754                 if (bucket->isEmpty()) {
1755                     ++ret.emptyBuckets;
1756                     Q_ASSERT(!bucket->monsterBucketExtent());
1757 #ifdef DEBUG_MONSTERBUCKETS
1758                     Q_ASSERT(m_freeSpaceBuckets.contains(a));
1759 #endif
1760                 }
1761 
1762                 lostSpace += bucket->lostSpace();
1763                 a += bucket->monsterBucketExtent();
1764             }
1765         }
1766 
1767         if (ret.totalBucketFollowerSlots)
1768             ret.averageNextBucketForHashSequenceLength /= ret.totalBucketFollowerSlots;
1769 
1770         ret.loadedBuckets = loadedBuckets;
1771         ret.currentBucket = m_currentBucket;
1772         ret.usedMemory = usedMemory();
1773         ret.loadedMonsterBuckets = loadedMonsterBuckets;
1774 
1775         ret.hashClashedItems = m_statBucketHashClashes;
1776         ret.totalItems = m_statItemCount;
1777         ret.usedSpaceForBuckets = usedBucketSpace;
1778         ret.freeSpaceInBuckets = freeBucketSpace;
1779         ret.lostSpace = lostSpace;
1780 
1781         ret.freeUnreachableSpace = freeUnreachableSpace;
1782         ret.averageInBucketHashSize = bucketCount ? (totalInBucketHashSize / bucketCount) : 0;
1783         ret.averageInBucketUsedSlotCount = bucketCount ? (totalInBucketUsedSlotCount / bucketCount) : 0;
1784         ret.averageInBucketSlotChainLength = float( totalInBucketChainLengths ) / totalInBucketUsedSlotCount;
1785 
1786         //If m_statBucketHashClashes is high, the bucket-hash needs to be bigger
1787         return ret;
1788     }
1789 
1790     ///This can be used to safely iterate through all items in the repository
1791     ///@param visitor Should be an instance of a class that has a bool operator()(const Item*) member function,
1792     ///               that returns whether more items are wanted.
1793     ///@param onlyInMemory If this is true, only items are visited that are currently in memory.
1794     template <class Visitor>
1795     void visitAllItems(Visitor& visitor, bool onlyInMemory = false) const
1796     {
1797         for (int a = 1; a <= m_currentBucket; ++a) {
1798             if (!onlyInMemory || m_buckets.at(a)) {
1799                 auto bucket = bucketForIndex(a);
1800                 if (bucket && !bucket->visitAllItems(visitor))
1801                     return;
1802             }
1803         }
1804     }
1805 
1806     QString repositoryName() const final { return m_repositoryName; }
1807 
1808     Mutex* mutex() const { return m_mutex; }
1809 
1810     void lock() final { m_mutex->lock(); }
1811     void unlock() final { m_mutex->unlock(); }
1812 
1813 private:
1814     void writeMetadata()
1815     {
1816         using namespace ItemRepositoryUtils;
1817 
1818         Q_ASSERT(m_file);
1819         Q_ASSERT(m_dynamicFile);
1820 
1821         m_file->seek(0);
1822         writeValue(m_file, m_repositoryVersion);
1823         uint hashSize = bucketHashSize;
1824         writeValue(m_file, hashSize);
1825         uint itemRepositoryVersion = staticItemRepositoryVersion();
1826         writeValue(m_file, itemRepositoryVersion);
1827         writeValue(m_file, m_statBucketHashClashes);
1828         writeValue(m_file, m_statItemCount);
1829 
1830         const uint bucketCount = static_cast<uint>(m_buckets.size());
1831         writeValue(m_file, bucketCount);
1832         writeValue(m_file, m_currentBucket);
1833         writeValues(m_file, bucketHashSize, m_firstBucketForHash);
1834         Q_ASSERT(m_file->pos() == BucketStartOffset);
1835 
1836         m_dynamicFile->seek(0);
1837         writeValue(m_dynamicFile, static_cast<uint>(m_freeSpaceBuckets.size()));
1838         writeList(m_dynamicFile, m_freeSpaceBuckets);
1839 
1840         Q_ASSERT(m_buckets.size() == m_monsterBucketTailMarker.size());
1841         writeList(m_dynamicFile, m_monsterBucketTailMarker);
1842     }
1843 
1844     ///Synchronizes the state on disk to the one in memory, and does some memory-management.
1845     ///Should be called on a regular basis. Can be called centrally from the global item repository registry.
1846     void store() final
1847     {
1848         if (m_file) {
1849             if (!m_file->open(QFile::ReadWrite) || !m_dynamicFile->open(QFile::ReadWrite)) {
1850                 qFatal("cannot re-open repository file for storing");
1851                 return;
1852             }
1853 
1854             for (int a = 0; a < m_buckets.size(); ++a) {
1855                 auto& bucket = m_buckets[a];
1856                 if (bucket) {
1857                     if (bucket->changed()) {
1858                         storeBucket(a);
1859                     }
1860                     if (m_unloadingEnabled) {
1861                         const int unloadAfterTicks = 2;
1862                         if (bucket->lastUsed() > unloadAfterTicks) {
1863                             delete bucket;
1864                             bucket = nullptr;
1865                         } else {
1866                             bucket->tick();
1867                         }
1868                     }
1869                 }
1870             }
1871 
1872             if (m_metaDataChanged) {
1873                 writeMetadata();
1874             }
1875             //To protect us from inconsistency due to crashes. flush() is not enough. We need to close.
1876             m_file->close();
1877             m_dynamicFile->close();
1878             Q_ASSERT(!m_file->isOpen());
1879             Q_ASSERT(!m_dynamicFile->isOpen());
1880         }
1881     }
1882 
1883     bool open(const QString& path) final
1884     {
1885         using namespace ItemRepositoryUtils;
1886 
1887         close();
1888         // qDebug() << "opening repository" << m_repositoryName << "at" << path;
1889         QDir dir(path);
1890         m_file = new QFile(dir.absoluteFilePath(m_repositoryName));
1891         m_dynamicFile = new QFile(dir.absoluteFilePath(m_repositoryName + QLatin1String("_dynamic")));
1892         if (!m_file->open(QFile::ReadWrite) || !m_dynamicFile->open(QFile::ReadWrite)) {
1893             delete m_file;
1894             m_file = nullptr;
1895             delete m_dynamicFile;
1896             m_dynamicFile = nullptr;
1897             return false;
1898         }
1899 
1900         m_metaDataChanged = true;
1901         if (m_file->size() == 0) {
1902             m_statBucketHashClashes = m_statItemCount = 0;
1903 
1904             Q_ASSERT(m_buckets.isEmpty());
1905             Q_ASSERT(m_freeSpaceBuckets.isEmpty());
1906             allocateNextBuckets(ItemRepositoryBucketLinearGrowthFactor);
1907 
1908             std::fill_n(m_firstBucketForHash, bucketHashSize, 0);
1909 
1910             // Skip the first bucket, we won't use it so we have the zero indices for special purposes
1911             Q_ASSERT(m_currentBucket == 1);
1912 
1913             // -1 because the 0 bucket is reserved for special purposes
1914             Q_ASSERT(m_freeSpaceBuckets.size() == m_buckets.size() - 1);
1915 
1916             writeMetadata();
1917 
1918             // We have completely initialized the file now
1919             if (m_file->pos() != BucketStartOffset) {
1920                 KMessageBox::error(nullptr,
1921                                    i18n("Failed writing to %1, probably the disk is full", m_file->fileName()));
1922                 abort();
1923             }
1924 
1925         } else {
1926             m_file->close();
1927             bool res = m_file->open(QFile::ReadOnly); // Re-open in read-only mode, so we create a read-only m_fileMap
1928             VERIFY(res);
1929             // Check that the version is correct
1930             uint storedVersion = 0, hashSize = 0, itemRepositoryVersion = 0;
1931 
1932             readValue(m_file, &storedVersion);
1933             readValue(m_file, &hashSize);
1934             readValue(m_file, &itemRepositoryVersion);
1935             readValue(m_file, &m_statBucketHashClashes);
1936             readValue(m_file, &m_statItemCount);
1937 
1938             if (storedVersion != m_repositoryVersion || hashSize != bucketHashSize
1939                 || itemRepositoryVersion != staticItemRepositoryVersion()) {
1940                 qDebug() << "repository" << m_repositoryName << "version mismatch in" << m_file->fileName()
1941                          << ", stored: version " << storedVersion << "hashsize" << hashSize << "repository-version"
1942                          << itemRepositoryVersion << " current: version" << m_repositoryVersion << "hashsize"
1943                          << bucketHashSize << "repository-version" << staticItemRepositoryVersion();
1944                 delete m_file;
1945                 m_file = nullptr;
1946                 delete m_dynamicFile;
1947                 m_dynamicFile = nullptr;
1948                 return false;
1949             }
1950             m_metaDataChanged = false;
1951 
1952             uint bucketCount = 0;
1953             readValue(m_file, &bucketCount);
1954 
1955             Q_ASSERT(m_buckets.isEmpty());
1956             Q_ASSERT(m_freeSpaceBuckets.isEmpty());
1957             // not calling allocateNextBuckets here, as we load buckets from file here, not new/next ones
1958             m_buckets.resize(bucketCount);
1959 
1960             readValue(m_file, &m_currentBucket);
1961             Q_ASSERT(m_currentBucket);
1962 
1963             readValues(m_file, bucketHashSize, m_firstBucketForHash);
1964 
1965             Q_ASSERT(m_file->pos() == BucketStartOffset);
1966 
1967             uint freeSpaceBucketsSize = 0;
1968             readValue(m_dynamicFile, &freeSpaceBucketsSize);
1969             m_freeSpaceBuckets.resize(freeSpaceBucketsSize);
1970             readList(m_dynamicFile, &m_freeSpaceBuckets);
1971 
1972             m_monsterBucketTailMarker.resize(bucketCount);
1973             readList(m_dynamicFile, &m_monsterBucketTailMarker);
1974         }
1975 
1976         m_fileMapSize = 0;
1977         m_fileMap = nullptr;
1978 
1979 #ifdef ITEMREPOSITORY_USE_MMAP_LOADING
1980         if (m_file->size() > BucketStartOffset) {
1981             m_fileMap = m_file->map(BucketStartOffset, m_file->size() - BucketStartOffset);
1982             Q_ASSERT(m_file->isOpen());
1983             Q_ASSERT(m_file->size() >= BucketStartOffset);
1984             if (m_fileMap) {
1985                 m_fileMapSize = m_file->size() - BucketStartOffset;
1986             } else {
1987                 qWarning() << "mapping" << m_file->fileName() << "FAILED!";
1988             }
1989         }
1990 #endif
1991         // To protect us from inconsistency due to crashes. flush() is not enough.
1992         m_file->close();
1993         m_dynamicFile->close();
1994 
1995         return true;
1996     }
1997 
1998     ///@warning by default, this does not store the current state to disk.
1999     void close(bool doStore = false) final
2000     {
2001         if (doStore)
2002             store();
2003 
2004         if (m_file)
2005             m_file->close();
2006         delete m_file;
2007         m_file = nullptr;
2008         m_fileMap = nullptr;
2009         m_fileMapSize = 0;
2010 
2011         if (m_dynamicFile)
2012             m_dynamicFile->close();
2013         delete m_dynamicFile;
2014         m_dynamicFile = nullptr;
2015 
2016         qDeleteAll(m_buckets);
2017         m_buckets.clear();
2018 
2019         std::fill_n(m_firstBucketForHash, bucketHashSize, 0);
2020     }
2021 
2022     int finalCleanup() final
2023     {
2024         int changed = 0;
2025         for (int a = 1; a <= m_currentBucket; ++a) {
2026             MyBucket* bucket = bucketForIndex(a);
2027             if (bucket && bucket->dirty()) { ///@todo Faster dirty check, without loading bucket
2028                 changed += bucket->finalCleanup(*this);
2029             }
2030             a += bucket->monsterBucketExtent(); // Skip buckets that are attached as tail to monster-buckets
2031         }
2032 
2033         return changed;
2034     }
2035 
2036     uint usedMemory() const
2037     {
2038         uint used = 0;
2039         for (auto* bucket : m_buckets) {
2040             if (bucket) {
2041                 used += bucket->usedMemory();
2042             }
2043         }
2044 
2045         return used;
2046     }
2047 
2048     uint createIndex(ushort bucketIndex, ushort indexInBucket) const
2049     {
2050         //Combine the index in the bucket, and the bucket number into one index
2051         const uint index = (bucketIndex << 16) + indexInBucket;
2052         verifyIndex(index);
2053         return index;
2054     }
2055 
2056     /**
2057      * Walks through all buckets clashing with @p hash
2058      *
2059      * Will return the value returned by the lambda, returning early if truthy
2060      */
2061     template <typename Visitor>
2062     auto walkBucketChain(unsigned int hash, const Visitor& visitor) const->decltype(visitor(0, nullptr))
2063     {
2064         unsigned short bucketIndex = m_firstBucketForHash[hash % bucketHashSize];
2065 
2066         while (bucketIndex) {
2067             auto* bucketPtr = bucketForIndex(bucketIndex);
2068 
2069             if (auto visitResult = visitor(bucketIndex, bucketPtr)) {
2070                 return visitResult;
2071             }
2072 
2073             bucketIndex = bucketPtr->nextBucketForHash(hash);
2074         }
2075 
2076         return {}; // clazy:exclude=returning-void-expression
2077     }
2078 
2079     ///Makes sure the order within m_freeSpaceBuckets is correct, after largestFreeSize has been changed for m_freeSpaceBuckets[index].
2080     ///If too few space is free within the given bucket, it is removed from m_freeSpaceBuckets.
2081     void updateFreeSpaceOrder(uint index)
2082     {
2083         m_metaDataChanged = true;
2084 
2085         unsigned int* freeSpaceBuckets = m_freeSpaceBuckets.data();
2086 
2087         Q_ASSERT(index < static_cast<uint>(m_freeSpaceBuckets.size()));
2088         MyBucket* bucketPtr = bucketForIndex(freeSpaceBuckets[index]);
2089 
2090         unsigned short largestFreeSize = bucketPtr->largestFreeSize();
2091 
2092         if (largestFreeSize == 0 ||
2093             (bucketPtr->freeItemCount() <= MyBucket::MaxFreeItemsForHide &&
2094              largestFreeSize <= MyBucket::MaxFreeSizeForHide)) {
2095             //Remove the item from freeSpaceBuckets
2096             m_freeSpaceBuckets.remove(index);
2097         } else {
2098             while (1) {
2099                 int prev = index - 1;
2100                 int next = index + 1;
2101                 if (prev >= 0 && (bucketForIndex(freeSpaceBuckets[prev])->largestFreeSize() > largestFreeSize ||
2102                                   (bucketForIndex(freeSpaceBuckets[prev])->largestFreeSize() == largestFreeSize &&
2103                                    freeSpaceBuckets[index] < freeSpaceBuckets[prev]))
2104                 ) {
2105                     //This item should be behind the successor, either because it has a lower largestFreeSize, or because the index is lower
2106                     uint oldPrevValue = freeSpaceBuckets[prev];
2107                     freeSpaceBuckets[prev] = freeSpaceBuckets[index];
2108                     freeSpaceBuckets[index] = oldPrevValue;
2109                     index = prev;
2110                 } else if (next < m_freeSpaceBuckets.size()
2111                            && (bucketForIndex(freeSpaceBuckets[next])->largestFreeSize() < largestFreeSize
2112                                || (bucketForIndex(freeSpaceBuckets[next])->largestFreeSize() == largestFreeSize
2113                                    && freeSpaceBuckets[index] > freeSpaceBuckets[next]))) {
2114                     //This item should be behind the successor, either because it has a higher largestFreeSize, or because the index is higher
2115                     uint oldNextValue = freeSpaceBuckets[next];
2116                     freeSpaceBuckets[next] = freeSpaceBuckets[index];
2117                     freeSpaceBuckets[index] = oldNextValue;
2118                     index = next;
2119                 } else {
2120                     break;
2121                 }
2122             }
2123         }
2124     }
2125 
2126     ///Does conversion from monster-bucket to normal bucket and from normal bucket to monster-bucket
2127     ///The bucket @param bucketNumber must already be loaded and empty. the "extent" buckets behind must also be loaded,
2128     ///and also be empty.
2129     ///The created buckets are not registered anywhere. When converting from monster-bucket to normal bucket,
2130     ///oldExtent+1 normal buckets are created, that must be registered somewhere.
2131     ///@warning During conversion, all the touched buckets are deleted and re-created
2132     ///@param extent When this is zero, the bucket is converted from monster-bucket to normal bucket.
2133     ///              When it is nonzero, it is converted to a monster-bucket.
2134     MyBucket* convertMonsterBucket(int bucketNumber, int extent)
2135     {
2136         m_metaDataChanged = true;
2137 
2138         Q_ASSERT(bucketNumber);
2139         auto* bucketPtr = bucketForIndex(bucketNumber);
2140 
2141         // the bucket may have encountered hash clashes in the past, we need to keep that data alive
2142         // note that all following buckets that got merged with the first bucket to create the monster
2143         // are guaranteed to _not_ have any next buckets, which is asserted in `deleteBucket`
2144         auto oldNextBucketHash = bucketPtr->takeNextBucketHash();
2145 
2146         if (extent) {
2147             //Convert to monster-bucket
2148             Q_ASSERT(bucketPtr->isEmpty());
2149 
2150             const auto monsterStart = bucketNumber;
2151             const auto monsterEnd = monsterStart + extent + 1;
2152 
2153             // NOTE: see assertion below, when we get here then the entry for `bucketNumber in `m_freeSpaceBuckets`
2154             //       will be followed by the entries for the following buckets, because they are all free and thus
2155             //       the second level ordering by bucket index is all that matters
2156             const auto freeSpaceIndex = m_freeSpaceBuckets.indexOf(bucketNumber);
2157             Q_ASSERT(freeSpaceIndex != -1);
2158 
2159 #ifdef DEBUG_MONSTERBUCKETS
2160             for (int offset = 0; offset < extent + 1; ++offset) {
2161                 auto bucket = bucketForIndex(bucketNumber + offset);
2162                 Q_ASSERT(bucket->isEmpty());
2163                 Q_ASSERT(!bucket->monsterBucketExtent());
2164                 Q_ASSERT(!m_monsterBucketTailMarker[bucketNumber + offset]);
2165                 // verify that the m_freeSpaceBuckets is ordered the way we expect it to
2166                 Q_ASSERT(m_freeSpaceBuckets[freeSpaceIndex + offset] == static_cast<uint>(bucketNumber + offset));
2167             }
2168 #endif
2169 
2170             // the following buckets are not free anymore, they get merged into the monster
2171             // NOTE: we assert above that the order of the entries is correct
2172             m_freeSpaceBuckets.remove(freeSpaceIndex, extent + 1);
2173 
2174             for (int index = monsterStart; index < monsterEnd; ++index)
2175                 deleteBucket(index);
2176 
2177             bucketPtr = new MyBucket();
2178             bucketPtr->initialize(extent, std::move(oldNextBucketHash));
2179 
2180             Q_ASSERT(!m_buckets[bucketNumber]);
2181             m_buckets[bucketNumber] = bucketPtr;
2182 
2183             // mark the tail of the monster bucket
2184             std::fill(std::next(m_monsterBucketTailMarker.begin(), monsterStart + 1),
2185                       std::next(m_monsterBucketTailMarker.begin(), monsterEnd), true);
2186 
2187 #ifdef DEBUG_MONSTERBUCKETS
2188             // all following buckets are deleted and not marked as free anymore
2189             for (int index = monsterStart + 1; index < monsterEnd; ++index) {
2190                 Q_ASSERT(!m_buckets[index]);
2191                 Q_ASSERT(!m_freeSpaceBuckets.contains(index));
2192 
2193                 // all tail buckets are marked as part of the monster now
2194                 Q_ASSERT(m_monsterBucketTailMarker[index]);
2195             }
2196 #endif
2197 
2198             // but the new monster bucket is still free
2199             Q_ASSERT(bucketPtr->isEmpty());
2200             // and it itself is not marked as a tail, to allow us to still look things up in it directly
2201             Q_ASSERT(!m_monsterBucketTailMarker[bucketNumber]);
2202             // monster buckets don't get put into the m_freeSpaceBuckets list
2203             Q_ASSERT(!m_freeSpaceBuckets.contains(bucketNumber));
2204         } else {
2205             const auto oldExtent = bucketPtr->monsterBucketExtent();
2206             const auto oldMonsterStart = bucketNumber;
2207             const auto oldMonsterEnd = oldMonsterStart + oldExtent + 1;
2208 
2209             Q_ASSERT(bucketPtr->monsterBucketExtent());
2210             Q_ASSERT(bucketPtr->isEmpty());
2211             // while empty, a monster bucket never is put into the m_freeSpaceBuckets list
2212             Q_ASSERT(!m_freeSpaceBuckets.contains(bucketNumber));
2213             // the monster itself is not a tail
2214             Q_ASSERT(!m_monsterBucketTailMarker[bucketNumber]);
2215 
2216 #ifdef DEBUG_MONSTERBUCKETS
2217             // all buckets that are part of the monster are marked as such
2218             for (int index = oldMonsterStart + 1; index < oldMonsterEnd; ++index) {
2219                 Q_ASSERT(m_monsterBucketTailMarker[index]);
2220             }
2221 #endif
2222 
2223             // delete the monster bucket
2224             deleteBucket(bucketNumber);
2225 
2226             // remove markers for the tail of the monster bucket
2227             std::fill(std::next(m_monsterBucketTailMarker.begin(), oldMonsterStart + 1),
2228                       std::next(m_monsterBucketTailMarker.begin(), oldMonsterEnd), false);
2229 
2230             // recreate non-monster buckets
2231             for (int index = oldMonsterStart; index < oldMonsterEnd; ++index) {
2232                 auto& bucket = m_buckets[index];
2233                 Q_ASSERT(!bucket);
2234 
2235                 bucket = new MyBucket();
2236 
2237                 if (index == oldMonsterStart) {
2238                     bucket->initialize(0, std::move(oldNextBucketHash));
2239                     bucketPtr = bucket;
2240                 } else {
2241                     bucket->initialize(0);
2242                 }
2243 
2244                 Q_ASSERT(!bucket->monsterBucketExtent());
2245                 Q_ASSERT(!m_freeSpaceBuckets.contains(index));
2246 
2247                 putIntoFreeList(index, bucket);
2248 
2249                 Q_ASSERT(m_freeSpaceBuckets.contains(index));
2250                 Q_ASSERT(!m_monsterBucketTailMarker[index]);
2251             }
2252         }
2253 
2254         Q_ASSERT(bucketPtr == m_buckets[bucketNumber]);
2255         return bucketPtr;
2256     }
2257 
2258     MyBucket* bucketForIndex(short unsigned int index) const
2259     {
2260         Q_ASSERT(index);
2261         MyBucket* bucketPtr = m_buckets.at(index);
2262         if (!bucketPtr) {
2263             bucketPtr = initializeBucket(index);
2264         }
2265         return bucketPtr;
2266     }
2267 
2268     struct AllItemsReachableVisitor {
2269         explicit AllItemsReachableVisitor(ItemRepository* rep)
2270             : repository(rep)
2271         {
2272         }
2273 
2274         bool operator()(const Item* item) { return repository->itemReachable(item); }
2275 
2276         ItemRepository* repository;
2277     };
2278 
2279     // Returns whether the given item is reachable through its hash
2280     bool itemReachable(const Item* item) const
2281     {
2282         const uint hash = item->hash();
2283 
2284         return walkBucketChain(hash, [=](ushort /*bucketIndex*/, const MyBucket* bucketPtr) {
2285             return bucketPtr->itemReachable(item, hash);
2286         });
2287     }
2288 
2289     // Returns true if all items in the given bucket are reachable through their hashes
2290     bool allItemsReachable(unsigned short bucket)
2291     {
2292         if (!bucket)
2293             return true;
2294 
2295         MyBucket* bucketPtr = bucketForIndex(bucket);
2296 
2297         AllItemsReachableVisitor visitor(this);
2298         return bucketPtr->visitAllItems(visitor);
2299     }
2300 
2301     inline MyBucket* initializeBucket(int bucketNumber) const
2302     {
2303         using namespace ItemRepositoryUtils;
2304 
2305         Q_ASSERT(bucketNumber);
2306 #ifdef DEBUG_MONSTERBUCKETS
2307         // ensure that the previous N buckets are no monster buckets that overlap the requested bucket
2308         for (int offset = 1; offset < 5; ++offset) {
2309             int test = bucketNumber - offset;
2310             if (test >= 0 && m_buckets[test]) {
2311                 Q_ASSERT(m_buckets[test]->monsterBucketExtent() < offset);
2312             }
2313         }
2314 
2315 #endif
2316 
2317         auto& bucket = m_buckets[bucketNumber];
2318         if (!bucket) {
2319             bucket = new MyBucket();
2320 
2321             bool doMMapLoading = ( bool )m_fileMap;
2322 
2323             uint offset = ((bucketNumber - 1) * MyBucket::DataSize);
2324             if (m_file && offset < m_fileMapSize && doMMapLoading &&
2325                 *reinterpret_cast<uint*>(m_fileMap + offset) == 0) {
2326 //         qDebug() << "loading bucket mmap:" << bucketNumber;
2327                 bucket->initializeFromMap(reinterpret_cast<char*>(m_fileMap + offset));
2328             } else if (m_file) {
2329                 //Either memory-mapping is disabled, or the item is not in the existing memory-map,
2330                 //so we have to load it the classical way.
2331                 bool res = m_file->open(QFile::ReadOnly);
2332 
2333                 if (offset + BucketStartOffset < m_file->size()) {
2334                     VERIFY(res);
2335                     offset += BucketStartOffset;
2336                     m_file->seek(offset);
2337                     int monsterBucketExtent;
2338                     readValue(m_file, &monsterBucketExtent);
2339                     m_file->seek(offset);
2340                     ///FIXME: use the data here instead of copying it again in prepareChange
2341                     QByteArray data = m_file->read((1 + monsterBucketExtent) * MyBucket::DataSize);
2342                     bucket->initializeFromMap(data.data());
2343                     bucket->prepareChange();
2344                 } else {
2345                     bucket->initialize(0);
2346                 }
2347 
2348                 m_file->close();
2349             } else {
2350                 bucket->initialize(0);
2351             }
2352         } else {
2353             bucket->initialize(0);
2354         }
2355 
2356         return bucket;
2357     }
2358 
2359     ///Can only be called on empty buckets
2360     void deleteBucket(int bucketNumber)
2361     {
2362         // NOTE: use bucketForIndex in the assertions here, as the bucket may not be initialized
2363         //       we still want to verify that we only delete empty buckets though
2364         Q_ASSERT(bucketForIndex(bucketNumber)->isEmpty());
2365         Q_ASSERT(bucketForIndex(bucketNumber)->noNextBuckets());
2366         Q_ASSERT(!m_freeSpaceBuckets.contains(bucketNumber));
2367 
2368         auto& bucket = m_buckets[bucketNumber];
2369         delete bucket;
2370         bucket = nullptr;
2371     }
2372 
2373     //m_file must be opened
2374     void storeBucket(int bucketNumber) const
2375     {
2376         if (!m_file) {
2377             return;
2378         }
2379         auto& bucket = m_buckets[bucketNumber];
2380         if (bucket) {
2381             bucket->store(m_file, BucketStartOffset + (bucketNumber - 1) * MyBucket::DataSize);
2382         }
2383     }
2384 
2385     /// If mustFindBucket is zero, the whole chain is just walked. This is good for debugging for infinite recursion.
2386     /// @return whether @p mustFindBucket was found
2387     bool walkBucketLinks(uint checkBucket, uint hash, uint mustFindBucket = 0) const
2388     {
2389         bool found = false;
2390         while (checkBucket) {
2391             if (checkBucket == mustFindBucket)
2392                 found = true;
2393 
2394             checkBucket = bucketForIndex(checkBucket)->nextBucketForHash(hash);
2395         }
2396         return found || (mustFindBucket == 0);
2397     }
2398 
2399     /// Computes the bucket where the chains opened by the buckets @p mainHead and @p intersectorHead
2400     /// with hash @p hash meet each other.
2401     /// @return <predecessor of first shared bucket in mainHead, first shared bucket>
2402     QPair<unsigned int, unsigned int> hashChainIntersection(uint mainHead, uint intersectorHead, uint hash) const
2403     {
2404         uint previous = 0;
2405         uint current = mainHead;
2406         while (current) {
2407             ///@todo Make this more efficient
2408             if (walkBucketLinks(intersectorHead, hash, current))
2409                 return qMakePair(previous, current);
2410 
2411             previous = current;
2412             current = bucketForIndex(current)->nextBucketForHash(hash);
2413         }
2414         return qMakePair(0u, 0u);
2415     }
2416 
2417     void putIntoFreeList(unsigned short bucket, MyBucket* bucketPtr)
2418     {
2419         Q_ASSERT(bucket);
2420 
2421         Q_ASSERT(!bucketPtr->monsterBucketExtent());
2422         int indexInFree = m_freeSpaceBuckets.indexOf(bucket);
2423 
2424         if (indexInFree == -1 &&
2425             (bucketPtr->freeItemCount() >= MyBucket::MinFreeItemsForReuse ||
2426              bucketPtr->largestFreeSize() >= MyBucket::MinFreeSizeForReuse)) {
2427             //Add the bucket to the list of buckets from where to re-assign free space
2428             //We only do it when a specific threshold of empty items is reached, because that way items can stay "somewhat" semantically ordered.
2429             Q_ASSERT(bucketPtr->largestFreeSize());
2430             int insertPos;
2431             for (insertPos = 0; insertPos < m_freeSpaceBuckets.size(); ++insertPos) {
2432                 if (bucketForIndex(m_freeSpaceBuckets[insertPos])->largestFreeSize() > bucketPtr->largestFreeSize())
2433                     break;
2434             }
2435 
2436             m_freeSpaceBuckets.insert(insertPos, bucket);
2437 
2438             updateFreeSpaceOrder(insertPos);
2439         } else if (indexInFree != -1) {
2440             ///Re-order so the order in m_freeSpaceBuckets is correct(sorted by largest free item size)
2441             updateFreeSpaceOrder(indexInFree);
2442         }
2443 
2444         // empty buckets definitely should be in the free space list
2445         Q_ASSERT(!bucketPtr->isEmpty() || m_freeSpaceBuckets.contains(bucket));
2446     }
2447 
2448     void verifyIndex(uint index) const
2449     {
2450         // We don't use zero indices
2451         Q_ASSERT(index);
2452         int bucket = (index >> 16);
2453         // nor zero buckets
2454         Q_ASSERT(bucket);
2455         Q_ASSERT_X(bucket < m_buckets.size(), Q_FUNC_INFO,
2456                    qPrintable(QStringLiteral("index %1 gives invalid bucket number %2, current count is: %3")
2457                               .arg(index)
2458                               .arg(bucket)
2459                               .arg(m_buckets.size())));
2460 
2461         // don't trigger compile warnings in release mode
2462         Q_UNUSED(bucket);
2463         Q_UNUSED(index);
2464     }
2465 
2466     void allocateNextBuckets(int numNewBuckets)
2467     {
2468         Q_ASSERT(numNewBuckets > 0);
2469         const auto oldSize = m_buckets.size();
2470         m_buckets.resize(oldSize + numNewBuckets);
2471         m_monsterBucketTailMarker.resize(m_buckets.size());
2472 
2473         // the buckets we just created can by definition not yet exist and must be empty
2474         // meaning we can bypass loading from file _and_ we must add them to the freeSpaceBuckets too
2475         for (int i = oldSize; i < (oldSize + numNewBuckets); ++i) {
2476             if (i == 0) // see below, the zero bucket is reserved for special purposes
2477                 continue;
2478 
2479             auto& bucket = m_buckets[i];
2480             Q_ASSERT(!bucket);
2481 
2482             bucket = new MyBucket;
2483             bucket->initialize(0);
2484 
2485             Q_ASSERT(bucket->isEmpty());
2486             Q_ASSERT(!bucket->monsterBucketExtent());
2487             Q_ASSERT(!m_monsterBucketTailMarker[i]);
2488 
2489             putIntoFreeList(i, bucket);
2490             Q_ASSERT(m_freeSpaceBuckets.contains(i));
2491         }
2492 
2493         // Skip the first bucket, we won't use it so we have the zero indices for special purposes
2494         if (m_currentBucket == 0)
2495             m_currentBucket = 1;
2496     }
2497 
2498     bool m_metaDataChanged = true;
2499     bool m_unloadingEnabled = true;
2500     // an unused, empty repo has no bucket yet
2501     // on first use this will then get incremented directly as we never use the zero-bucket index
2502     // that value is reserved for special purposes instead
2503     mutable int m_currentBucket = 0;
2504 
2505     //List of buckets that have free space available that can be assigned. Sorted by size: Smallest space first. Second order sorting: Bucket index
2506     QVector<uint> m_freeSpaceBuckets;
2507     // every bucket that is part of a monster bucket but not the "main" bucket gets marked in this list,
2508     // this allows us to ensure we don't try to put an entry into such a bucket later on, which would corrupt data
2509     QVector<bool> m_monsterBucketTailMarker;
2510     //List of hash map buckets that actually hold the data of the item repository
2511     mutable QVector<MyBucket*> m_buckets;
2512     uint m_statBucketHashClashes = 0;
2513     uint m_statItemCount = 0;
2514     //Maps hash-values modulo 1<<bucketHashSizeBits to the first bucket such a hash-value appears in
2515     short unsigned int m_firstBucketForHash[bucketHashSize] = { 0 };
2516 
2517     //File that contains the buckets
2518     QFile* m_file = nullptr;
2519     uchar* m_fileMap = nullptr;
2520     uint m_fileMapSize = 0;
2521     //File that contains more dynamic data, like the list of buckets with deleted items
2522     QFile* m_dynamicFile = nullptr;
2523 
2524     const QString m_repositoryName;
2525     const uint m_repositoryVersion;
2526     Mutex* const m_mutex;
2527     ItemRepositoryRegistry* const m_registry;
2528 
2529     friend class ::TestItemRepository;
2530     friend class ::BenchItemRepository;
2531 };
2532 
2533 template<typename Context>
2534 class ItemRepositoryFor;
2535 
2536 struct LockedItemRepository {
2537     template<typename Context, typename Op>
2538     static auto read(Op&& op)
2539     {
2540         const auto& repo = ItemRepositoryFor<Context>::repo();
2541 
2542         QMutexLocker lock(repo.mutex());
2543         return op(repo);
2544     }
2545 
2546     template<typename Context, typename Op>
2547     static auto write(Op&& op)
2548     {
2549         auto& repo = ItemRepositoryFor<Context>::repo();
2550 
2551         QMutexLocker lock(repo.mutex());
2552         return op(repo);
2553     }
2554 
2555     template<typename Context>
2556     static void initialize()
2557     {
2558         ItemRepositoryFor<Context>::repo();
2559     }
2560 };
2561 }
2562 
2563 #endif