File indexing completed on 2024-05-05 04:38:10
0001 /* 0002 SPDX-FileCopyrightText: 2008 David Nolden <david.nolden.kdevelop@art-master.de> 0003 0004 SPDX-License-Identifier: LGPL-2.0-only 0005 */ 0006 0007 #ifndef KDEVPLATFORM_ITEMREPOSITORY_H 0008 #define KDEVPLATFORM_ITEMREPOSITORY_H 0009 0010 #include <QDebug> 0011 #include <QDir> 0012 #include <QFile> 0013 #include <QMutex> 0014 #include <QMutexLocker> 0015 0016 #include <KMessageBox> 0017 #include <KLocalizedString> 0018 0019 #include <algorithm> 0020 #include <memory> 0021 #include <type_traits> 0022 0023 #include "referencecounting.h" 0024 #include "abstractitemrepository.h" 0025 #include "itemrepositoryregistry.h" 0026 0027 //#define DEBUG_MONSTERBUCKETS 0028 0029 // #define DEBUG_ITEMREPOSITORY_LOADING 0030 // #define ifDebugInfiniteRecursion(x) x 0031 #define ifDebugInfiniteRecursion(x) 0032 0033 // #define ifDebugLostSpace(x) x 0034 #define ifDebugLostSpace(x) 0035 // #define DEBUG_INCORRECT_DELETE 0036 0037 //Makes sure that all items stay reachable through the basic hash 0038 // #define DEBUG_ITEM_REACHABILITY 0039 0040 ///@todo Dynamic bucket hash size 0041 0042 #ifdef DEBUG_ITEM_REACHABILITY 0043 #define ENSURE_REACHABLE(bucket) Q_ASSERT(allItemsReachable(bucket)); 0044 #define IF_ENSURE_REACHABLE(x) x 0045 #else 0046 #define ENSURE_REACHABLE(bucket) 0047 #define IF_ENSURE_REACHABLE(x) 0048 #endif 0049 0050 #define ITEMREPOSITORY_USE_MMAP_LOADING 0051 0052 //Assertion macro that prevents warnings if debugging is disabled 0053 //Only use it to verify values, it should not call any functions, since else the function will even be called in release mode 0054 #ifdef QT_NO_DEBUG 0055 #define VERIFY(X) if (!(X)) {qWarning() << "Failed to verify expression" << # X;} 0056 #else 0057 #define VERIFY(X) Q_ASSERT(X) 0058 #endif 0059 0060 ///When this is uncommented, a 64-bit test-value is written behind the area an item is allowed to write into before 0061 ///createItem(..) is called, and an assertion triggers when it was changed during createItem(), which means createItem wrote too long. 0062 ///The problem: This temporarily overwrites valid data in the following item, so it will cause serious problems if that data is accessed 0063 ///during the call to createItem(). 0064 // #define DEBUG_WRITING_EXTENTS 0065 0066 class TestItemRepository; 0067 class BenchItemRepository; 0068 0069 namespace KDevelop { 0070 namespace ItemRepositoryUtils { 0071 class FileMap 0072 { 0073 Q_DISABLE_COPY_MOVE(FileMap) 0074 public: 0075 explicit FileMap(char* data) 0076 : m_data(data) 0077 { 0078 Q_ASSERT(m_data); 0079 } 0080 0081 template<typename T> 0082 void readValue(T* to) 0083 { 0084 Q_ASSERT(to); 0085 static_assert(std::is_integral_v<T>); 0086 0087 *to = *reinterpret_cast<T*>(m_data); 0088 m_data += sizeof(T); 0089 } 0090 0091 template<typename T> 0092 void readArray(T** to, uint size) 0093 { 0094 Q_ASSERT(to); 0095 static_assert(std::is_integral_v<T>); 0096 0097 *to = reinterpret_cast<T*>(m_data); 0098 m_data += sizeof(T) * size; 0099 } 0100 0101 char* current() const 0102 { 0103 return m_data; 0104 } 0105 0106 private: 0107 char* m_data; 0108 }; 0109 0110 template<class T> 0111 void readValues(QIODevice* file, uint numValues, T* to) 0112 { 0113 Q_ASSERT(file); 0114 Q_ASSERT(to); 0115 static_assert(std::is_integral_v<T>); 0116 0117 file->read(reinterpret_cast<char*>(to), sizeof(T) * numValues); 0118 } 0119 0120 template<class T> 0121 void readValue(QIODevice* file, T* to) 0122 { 0123 readValues(file, 1, to); 0124 } 0125 0126 template<typename T> 0127 void readList(QIODevice* file, QVector<T>* to) 0128 { 0129 Q_ASSERT(to); 0130 readValues(file, to->size(), to->data()); 0131 } 0132 0133 template<class T> 0134 void writeValues(QIODevice* file, uint numValues, const T* from) 0135 { 0136 Q_ASSERT(file); 0137 Q_ASSERT(from); 0138 static_assert(std::is_integral_v<T>); 0139 0140 file->write(reinterpret_cast<const char*>(from), sizeof(T) * numValues); 0141 } 0142 0143 template<class T> 0144 void writeValue(QIODevice* file, const T& from) 0145 { 0146 writeValues(file, 1, &from); 0147 } 0148 0149 template<typename T> 0150 void writeList(QIODevice* file, const QVector<T>& from) 0151 { 0152 writeValues(file, from.size(), from.data()); 0153 } 0154 } 0155 /** 0156 * This file implements a generic bucket-based indexing repository, that can be used for example to index strings. 0157 * 0158 * All you need to do is define your item type that you want to store into the repository, and create a request item 0159 * similar to ExampleItemRequest that compares and fills the defined item type. 0160 * 0161 * For example the string repository uses "unsigned short" as item-type, uses that actual value to store the length of the string, 0162 * and uses the space behind to store the actual string content. 0163 * 0164 * @see AbstractItemRepository 0165 * @see ItemRepository 0166 * 0167 * @see ExampleItem 0168 * @see ExampleItemRequest 0169 * 0170 * @see typerepository.h 0171 * @see stringrepository.h 0172 * @see indexedstring.h 0173 */ 0174 0175 enum { 0176 ItemRepositoryBucketSize = 1 << 16, 0177 ItemRepositoryBucketLimit = 1 << 16, 0178 ItemRepositoryBucketLinearGrowthFactor = 10, 0179 }; 0180 0181 /** 0182 * Buckets are the memory-units that are used to store the data in an ItemRepository. 0183 * 0184 * About monster buckets: Normally a bucket has a size of 64kb, but when an item is 0185 * allocated that is larger than that, a "monster bucket" is allocated, which spans the 0186 * space of multiple buckets. 0187 */ 0188 template <class Item, class ItemRequest, bool markForReferenceCounting, uint fixedItemSize> 0189 class Bucket 0190 { 0191 public: 0192 enum { 0193 AdditionalSpacePerItem = 2 0194 }; 0195 enum { 0196 ObjectMapSize = ((ItemRepositoryBucketSize / ItemRequest::AverageSize) * 3) / 2 + 1, 0197 MaxFreeItemsForHide = 0, //When less than this count of free items in one buckets is reached, the bucket is removed from the global list of buckets with free items 0198 MaxFreeSizeForHide = fixedItemSize ? fixedItemSize : 0, //Only when the largest free size is smaller then this, the bucket is taken from the free list 0199 MinFreeItemsForReuse = 10,//When this count of free items in one bucket is reached, consider re-assigning them to new requests 0200 MinFreeSizeForReuse = ItemRepositoryBucketSize / 20 //When the largest free item is bigger then this, the bucket is automatically added to the free list 0201 }; 0202 enum { 0203 NextBucketHashSize = ObjectMapSize, //Affects the average count of bucket-chains that need to be walked in ItemRepository::index. Must be a multiple of ObjectMapSize 0204 DataSize = sizeof(char) + sizeof(unsigned int) * 3 + ItemRepositoryBucketSize + sizeof(short unsigned int) * 0205 (ObjectMapSize + NextBucketHashSize + 1) 0206 }; 0207 enum { 0208 CheckStart = 0xff00ff1, 0209 CheckEnd = 0xfafcfb 0210 }; 0211 Bucket() 0212 { 0213 } 0214 Q_DISABLE_COPY_MOVE(Bucket) 0215 ~Bucket() 0216 { 0217 if (m_data != m_mappedData) { 0218 delete[] m_data; 0219 delete[] m_nextBucketHash; 0220 delete[] m_objectMap; 0221 } 0222 } 0223 0224 void initialize(int monsterBucketExtent, std::unique_ptr<short unsigned int[]> nextBucketHashToRestore = {}) 0225 { 0226 if (!m_data) { 0227 m_monsterBucketExtent = monsterBucketExtent; 0228 m_available = ItemRepositoryBucketSize; 0229 m_data = new char[dataSize()]; 0230 #ifndef QT_NO_DEBUG 0231 std::fill_n(m_data, dataSize(), 0); 0232 #endif 0233 //The bigger we make the map, the lower the probability of a clash(and thus bad performance). However it increases memory usage. 0234 // NOTE: the `()` at the end of `new int[...]()` ensures the data is zero-initialized, see e.g.: 0235 // https://stackoverflow.com/questions/7546620/operator-new-initializes-memory-to-zero 0236 m_objectMap = new short unsigned int[ObjectMapSize](); 0237 0238 if (nextBucketHashToRestore) { 0239 m_nextBucketHash = nextBucketHashToRestore.release(); 0240 } else { 0241 m_nextBucketHash = new short unsigned int[NextBucketHashSize](); 0242 } 0243 0244 m_changed = true; 0245 m_dirty = false; 0246 m_lastUsed = 0; 0247 } 0248 } 0249 0250 void initializeFromMap(char* fileMapData) 0251 { 0252 if (m_data) { 0253 return; 0254 } 0255 0256 ItemRepositoryUtils::FileMap fileMap(fileMapData); 0257 0258 fileMap.readValue(&m_monsterBucketExtent); 0259 Q_ASSERT(fileMap.current() - fileMapData == 4); 0260 fileMap.readValue(&m_available); 0261 fileMap.readArray(&m_objectMap, ObjectMapSize); 0262 fileMap.readArray(&m_nextBucketHash, NextBucketHashSize); 0263 fileMap.readValue(&m_largestFreeItem); 0264 fileMap.readValue(&m_freeItemCount); 0265 fileMap.readValue(&m_dirty); 0266 0267 m_data = fileMap.current(); 0268 m_mappedData = m_data; 0269 0270 m_changed = false; 0271 m_lastUsed = 0; 0272 Q_ASSERT(fileMap.current() - fileMapData == DataSize - ItemRepositoryBucketSize); 0273 } 0274 0275 void store(QFile* file, size_t offset) 0276 { 0277 using namespace ItemRepositoryUtils; 0278 0279 if (!m_data) 0280 return; 0281 0282 if (static_cast<size_t>(file->size()) < offset + (1 + m_monsterBucketExtent) * DataSize) 0283 file->resize(offset + (1 + m_monsterBucketExtent) * DataSize); 0284 0285 file->seek(offset); 0286 0287 writeValue(file, m_monsterBucketExtent); 0288 writeValue(file, m_available); 0289 writeValues(file, ObjectMapSize, m_objectMap); 0290 writeValues(file, NextBucketHashSize, m_nextBucketHash); 0291 writeValue(file, m_largestFreeItem); 0292 writeValue(file, m_freeItemCount); 0293 writeValue(file, m_dirty); 0294 writeValues(file, dataSize(), m_data); 0295 0296 if (static_cast<size_t>(file->pos()) != offset + (1 + m_monsterBucketExtent) * DataSize) { 0297 KMessageBox::error(nullptr, i18n("Failed writing to %1, probably the disk is full", file->fileName())); 0298 abort(); 0299 } 0300 0301 m_changed = false; 0302 #ifdef DEBUG_ITEMREPOSITORY_LOADING 0303 { 0304 file->flush(); 0305 file->seek(offset); 0306 0307 uint available, freeItemCount; 0308 int monsterBucketExtent; 0309 short unsigned int largestFree; 0310 bool dirty; 0311 0312 short unsigned int* m = new short unsigned int[ObjectMapSize]; 0313 short unsigned int* h = new short unsigned int[NextBucketHashSize]; 0314 0315 readValue(file, &monsterBucketExtent); 0316 Q_ASSERT(monsterBucketExtent == m_monsterBucketExtent); 0317 0318 char* d = new char[dataSize()]; 0319 0320 readValue(file, &available); 0321 readValues(file, ObjectMapSize, m); 0322 readValues(file, NextBucketHashSize, h); 0323 readValue(file, &largestFree); 0324 readValue(file, &freeItemCount); 0325 readValue(file, &dirty); 0326 readValues(file, dataSize(), d); 0327 0328 Q_ASSERT(available == m_available); 0329 Q_ASSERT(std::equal(d, std::next(d, dataSize()), m_data)); 0330 Q_ASSERT(std::equal(m, std::next(m, ObjectMapSize), m_objectMap)); 0331 Q_ASSERT(std::equal(h, std::next(h, NextBucketHashSize), m_nextBucketHash)); 0332 Q_ASSERT(m_largestFreeItem == largestFree); 0333 Q_ASSERT(m_freeItemCount == freeItemCount); 0334 Q_ASSERT(m_dirty == dirty); 0335 0336 Q_ASSERT(static_cast<size_t>(file->pos()) == offset + DataSize + m_monsterBucketExtent * DataSize); 0337 0338 delete[] d; 0339 delete[] m; 0340 delete[] h; 0341 } 0342 #endif 0343 } 0344 0345 inline char* data() 0346 { 0347 return m_data; 0348 } 0349 0350 inline uint dataSize() const 0351 { 0352 return ItemRepositoryBucketSize + m_monsterBucketExtent * DataSize; 0353 } 0354 0355 //Tries to find the index this item has in this bucket, or returns zero if the item isn't there yet. 0356 unsigned short findIndex(const ItemRequest& request) const 0357 { 0358 m_lastUsed = 0; 0359 0360 unsigned short localHash = request.hash() % ObjectMapSize; 0361 unsigned short index = m_objectMap[localHash]; 0362 0363 unsigned short follower = 0; 0364 //Walk the chain of items with the same local hash 0365 while (index && (follower = followerIndex(index)) && !(request.equals(itemFromIndex(index)))) 0366 index = follower; 0367 0368 if (index && request.equals(itemFromIndex(index))) { 0369 return index; //We have found the item 0370 } 0371 0372 return 0; 0373 } 0374 0375 //Tries to get the index within this bucket, or returns zero. Will put the item into the bucket if there is room. 0376 //Created indices will never begin with 0xffff____, so you can use that index-range for own purposes. 0377 unsigned short index(const ItemRequest& request, unsigned int itemSize) 0378 { 0379 m_lastUsed = 0; 0380 0381 unsigned short localHash = request.hash() % ObjectMapSize; 0382 unsigned short index = m_objectMap[localHash]; 0383 unsigned short insertedAt = 0; 0384 0385 const auto createInsertedItem = [&]() { 0386 const OptionalDUChainReferenceCountingEnabler<markForReferenceCounting> optionalRc(m_data, dataSize()); 0387 request.createItem(reinterpret_cast<Item*>(m_data + insertedAt)); 0388 }; 0389 0390 unsigned short follower = 0; 0391 //Walk the chain of items with the same local hash 0392 while (index && (follower = followerIndex(index)) && !(request.equals(itemFromIndex(index)))) 0393 index = follower; 0394 0395 if (index && request.equals(itemFromIndex(index))) 0396 return index; //We have found the item 0397 0398 ifDebugLostSpace(Q_ASSERT(!lostSpace()); ) 0399 0400 prepareChange(); 0401 0402 unsigned int totalSize = itemSize + AdditionalSpacePerItem; 0403 0404 if (m_monsterBucketExtent) { 0405 /// This is a monster-bucket. Other rules are applied here: 0406 /// Only one item can be allocated, and that must be bigger than the bucket data 0407 if (!m_available) 0408 return 0; 0409 0410 Q_ASSERT(totalSize > ItemRepositoryBucketSize); 0411 m_available = 0; 0412 0413 insertedAt = AdditionalSpacePerItem; 0414 setFollowerIndex(insertedAt, 0); 0415 Q_ASSERT(m_objectMap[localHash] == 0); 0416 m_objectMap[localHash] = insertedAt; 0417 createInsertedItem(); 0418 return insertedAt; 0419 } 0420 0421 //The second condition is needed, else we can get problems with zero-length items and an overflow in insertedAt to zero 0422 if (totalSize > m_available || (!itemSize && totalSize == m_available)) { 0423 //Try finding the smallest freed item that can hold the data 0424 unsigned short currentIndex = m_largestFreeItem; 0425 unsigned short previousIndex = 0; 0426 0427 unsigned short freeChunkSize = 0; 0428 0429 ///@todo Achieve this without full iteration 0430 while (currentIndex && freeSize(currentIndex) > itemSize) { 0431 unsigned short follower = followerIndex(currentIndex); 0432 if (follower && freeSize(follower) >= itemSize) { 0433 //The item also fits into the smaller follower, so use that one 0434 previousIndex = currentIndex; 0435 currentIndex = follower; 0436 } else { 0437 //The item fits into currentIndex, but not into the follower. So use currentIndex 0438 freeChunkSize = freeSize(currentIndex) - itemSize; 0439 0440 //We need 2 bytes to store the free size 0441 if (freeChunkSize != 0 && freeChunkSize < AdditionalSpacePerItem + 2) { 0442 //we can not manage the resulting free chunk as a separate item, so we cannot use this position. 0443 //Just pick the biggest free item, because there we can be sure that 0444 //either we can manage the split, or we cannot do anything at all in this bucket. 0445 0446 freeChunkSize = freeSize(m_largestFreeItem) - itemSize; 0447 0448 if (freeChunkSize == 0 || freeChunkSize >= AdditionalSpacePerItem + 2) { 0449 previousIndex = 0; 0450 currentIndex = m_largestFreeItem; 0451 } else { 0452 currentIndex = 0; 0453 } 0454 } 0455 break; 0456 } 0457 } 0458 0459 if (!currentIndex || freeSize(currentIndex) < (totalSize - AdditionalSpacePerItem)) 0460 return 0; 0461 0462 if (previousIndex) 0463 setFollowerIndex(previousIndex, followerIndex(currentIndex)); 0464 else 0465 m_largestFreeItem = followerIndex(currentIndex); 0466 0467 --m_freeItemCount; //Took one free item out of the chain 0468 0469 ifDebugLostSpace(Q_ASSERT(( uint )lostSpace() == ( uint )(freeSize(currentIndex) + AdditionalSpacePerItem)); 0470 ) 0471 0472 if (freeChunkSize) { 0473 Q_ASSERT(freeChunkSize >= AdditionalSpacePerItem + 2); 0474 unsigned short freeItemSize = freeChunkSize - AdditionalSpacePerItem; 0475 0476 unsigned short freeItemPosition; 0477 //Insert the resulting free chunk into the list of free items, so we don't lose it 0478 if (isBehindFreeSpace(currentIndex)) { 0479 //Create the free item at the beginning of currentIndex, so it can be merged with the free space in front 0480 freeItemPosition = currentIndex; 0481 currentIndex += freeItemSize + AdditionalSpacePerItem; 0482 } else { 0483 //Create the free item behind currentIndex 0484 freeItemPosition = currentIndex + itemSize + AdditionalSpacePerItem; 0485 } 0486 setFreeSize(freeItemPosition, freeItemSize); 0487 insertFreeItem(freeItemPosition); 0488 } 0489 0490 insertedAt = currentIndex; 0491 Q_ASSERT(( bool )m_freeItemCount == ( bool )m_largestFreeItem); 0492 } else { 0493 //We have to insert the item 0494 insertedAt = ItemRepositoryBucketSize - m_available; 0495 0496 insertedAt += AdditionalSpacePerItem; //Room for the prepended follower-index 0497 0498 m_available -= totalSize; 0499 } 0500 0501 ifDebugLostSpace(Q_ASSERT(lostSpace() == totalSize); ) 0502 0503 Q_ASSERT(!index || !followerIndex(index)); 0504 0505 Q_ASSERT(!m_objectMap[localHash] || index); 0506 0507 if (index) 0508 setFollowerIndex(index, insertedAt); 0509 setFollowerIndex(insertedAt, 0); 0510 0511 if (m_objectMap[localHash] == 0) 0512 m_objectMap[localHash] = insertedAt; 0513 0514 #ifdef DEBUG_CREATEITEM_EXTENTS 0515 char* borderBehind = m_data + insertedAt + (totalSize - AdditionalSpacePerItem); 0516 0517 quint64 oldValueBehind = 0; 0518 if (m_available >= 8) { 0519 oldValueBehind = *( quint64* )borderBehind; 0520 *(( quint64* )borderBehind) = 0xfafafafafafafafaLLU; 0521 } 0522 #endif 0523 0524 //Last thing we do, because createItem may recursively do even more transformation of the repository 0525 createInsertedItem(); 0526 0527 #ifdef DEBUG_CREATEITEM_EXTENTS 0528 if (m_available >= 8) { 0529 //If this assertion triggers, then the item writes a bigger range than it advertised in 0530 Q_ASSERT(*(( quint64* )borderBehind) == 0xfafafafafafafafaLLU); 0531 *(( quint64* )borderBehind) = oldValueBehind; 0532 } 0533 #endif 0534 0535 Q_ASSERT(itemFromIndex(insertedAt)->hash() == request.hash()); 0536 Q_ASSERT(itemFromIndex(insertedAt)->itemSize() == itemSize); 0537 0538 ifDebugLostSpace(if (lostSpace()) qDebug() << "lost space:" << lostSpace(); Q_ASSERT(!lostSpace()); ) 0539 0540 return insertedAt; 0541 } 0542 0543 /// @param modulo Returns whether this bucket contains an item with (hash % modulo) == (item.hash % modulo) 0544 /// The default-parameter is the size of the next-bucket hash that is used by setNextBucketForHash and nextBucketForHash 0545 /// @note modulo MUST be a multiple of ObjectMapSize, because (b-a) | (x * h1) => (b-a) | h2, where a|b means a is a multiple of b. 0546 /// This allows efficiently computing the clashes using the local object map hash. 0547 bool hasClashingItem(uint hash, uint modulo) 0548 { 0549 Q_ASSERT(modulo % ObjectMapSize == 0); 0550 0551 m_lastUsed = 0; 0552 0553 uint hashMod = hash % modulo; 0554 unsigned short localHash = hash % ObjectMapSize; 0555 unsigned short currentIndex = m_objectMap[localHash]; 0556 0557 if (currentIndex == 0) 0558 return false; 0559 0560 while (currentIndex) { 0561 uint currentHash = itemFromIndex(currentIndex)->hash(); 0562 0563 Q_ASSERT(currentHash % ObjectMapSize == localHash); 0564 0565 if (currentHash % modulo == hashMod) 0566 return true; //Clash 0567 currentIndex = followerIndex(currentIndex); 0568 } 0569 return false; 0570 } 0571 0572 void countFollowerIndexLengths(uint& usedSlots, uint& lengths, uint& slotCount, uint& longestInBucketFollowerChain) 0573 { 0574 for (uint a = 0; a < ObjectMapSize; ++a) { 0575 unsigned short currentIndex = m_objectMap[a]; 0576 ++slotCount; 0577 uint length = 0; 0578 0579 if (currentIndex) { 0580 ++usedSlots; 0581 0582 while (currentIndex) { 0583 ++length; 0584 ++lengths; 0585 currentIndex = followerIndex(currentIndex); 0586 } 0587 if (length > longestInBucketFollowerChain) { 0588 // qDebug() << "follower-chain at" << a << ":" << length; 0589 0590 longestInBucketFollowerChain = length; 0591 } 0592 } 0593 } 0594 } 0595 0596 //Returns whether the given item is reachabe within this bucket, through its hash. 0597 bool itemReachable(const Item* item, uint hash) const 0598 { 0599 unsigned short localHash = hash % ObjectMapSize; 0600 unsigned short currentIndex = m_objectMap[localHash]; 0601 0602 while (currentIndex) { 0603 if (itemFromIndex(currentIndex) == item) 0604 return true; 0605 0606 currentIndex = followerIndex(currentIndex); 0607 } 0608 return false; 0609 } 0610 0611 template <class Repository> 0612 void deleteItem(unsigned short index, unsigned int hash, Repository& repository) 0613 { 0614 ifDebugLostSpace(Q_ASSERT(!lostSpace()); ) 0615 0616 m_lastUsed = 0; 0617 prepareChange(); 0618 0619 unsigned int size = itemFromIndex(index)->itemSize(); 0620 //Step 1: Remove the item from the data-structures that allow finding it: m_objectMap 0621 unsigned short localHash = hash % ObjectMapSize; 0622 unsigned short currentIndex = m_objectMap[localHash]; 0623 unsigned short previousIndex = 0; 0624 0625 //Fix the follower-link by setting the follower of the previous item to the next one, or updating m_objectMap 0626 while (currentIndex != index) { 0627 previousIndex = currentIndex; 0628 currentIndex = followerIndex(currentIndex); 0629 //If this assertion triggers, the deleted item was not registered under the given hash 0630 Q_ASSERT(currentIndex); 0631 } 0632 Q_ASSERT(currentIndex == index); 0633 0634 if (!previousIndex) 0635 //The item was directly in the object map 0636 m_objectMap[localHash] = followerIndex(index); 0637 else 0638 setFollowerIndex(previousIndex, followerIndex(index)); 0639 0640 Item* item = const_cast<Item*>(itemFromIndex(index)); 0641 0642 { 0643 const OptionalDUChainReferenceCountingEnabler<markForReferenceCounting> optionalRc(m_data, dataSize()); 0644 ItemRequest::destroy(item, repository); 0645 } 0646 0647 #ifndef QT_NO_DEBUG 0648 #if defined(__GNUC__) && !defined(__INTEL_COMPILER) && (((__GNUC__ * 100) + __GNUC_MINOR__) >= 800) 0649 #pragma GCC diagnostic push 0650 #pragma GCC diagnostic ignored "-Wclass-memaccess" 0651 #endif 0652 //For debugging, so we notice the data is wrong 0653 std::fill_n(reinterpret_cast<char*>(item), size, 0); 0654 #if defined(__GNUC__) && !defined(__INTEL_COMPILER) && (((__GNUC__ * 100) + __GNUC_MINOR__) >= 800) 0655 #pragma GCC diagnostic pop 0656 #endif 0657 #endif 0658 0659 if (m_monsterBucketExtent) { 0660 ///This is a monster-bucket. Make it completely empty again. 0661 Q_ASSERT(!m_available); 0662 m_available = ItemRepositoryBucketSize; 0663 0664 //Items are always inserted into monster-buckets at a fixed position 0665 Q_ASSERT(currentIndex == AdditionalSpacePerItem); 0666 Q_ASSERT(m_objectMap[localHash] == 0); 0667 } else { 0668 ///Put the space into the free-set 0669 setFreeSize(index, size); 0670 0671 //Try merging the created free item to other free items around, else add it into the free list 0672 insertFreeItem(index); 0673 0674 if (m_freeItemCount == 1 && freeSize(m_largestFreeItem) + m_available == ItemRepositoryBucketSize) { 0675 //Everything has been deleted, there is only free space left. Make the bucket empty again, 0676 //so it can later also be used as a monster-bucket. 0677 m_available = ItemRepositoryBucketSize; 0678 m_freeItemCount = 0; 0679 m_largestFreeItem = 0; 0680 } 0681 } 0682 0683 Q_ASSERT(( bool )m_freeItemCount == ( bool )m_largestFreeItem); 0684 ifDebugLostSpace(Q_ASSERT(!lostSpace()); ) 0685 #ifdef DEBUG_INCORRECT_DELETE 0686 //Make sure the item cannot be found any more 0687 { 0688 unsigned short localHash = hash % ObjectMapSize; 0689 unsigned short currentIndex = m_objectMap[localHash]; 0690 0691 while (currentIndex && currentIndex != index) { 0692 previousIndex = currentIndex; 0693 currentIndex = followerIndex(currentIndex); 0694 } 0695 Q_ASSERT(!currentIndex); //The item must not be found 0696 } 0697 #endif 0698 // Q_ASSERT(canAllocateItem(size)); 0699 } 0700 0701 ///@warning The returned item may be in write-protected memory, so never try doing a const_cast and changing some data 0702 /// If you need to change something, use dynamicItemFromIndex 0703 ///@warning When using multi-threading, mutex() must be locked as long as you use the returned data 0704 inline const Item* itemFromIndex(unsigned short index) const 0705 { 0706 m_lastUsed = 0; 0707 return reinterpret_cast<Item*>(m_data + index); 0708 } 0709 0710 bool isEmpty() const 0711 { 0712 return m_available == ItemRepositoryBucketSize; 0713 } 0714 0715 ///Returns true if this bucket has no nextBucketForHash links 0716 bool noNextBuckets() const 0717 { 0718 if (!m_nextBucketHash) 0719 return true; 0720 0721 for (int a = 0; a < NextBucketHashSize; ++a) 0722 if (m_nextBucketHash[a]) 0723 return false; 0724 0725 return true; 0726 } 0727 0728 std::unique_ptr<short unsigned int[]> takeNextBucketHash() 0729 { 0730 if (m_mappedData == m_data) { 0731 // mmapped data, we need to copy the next bucket hash 0732 auto ret = std::make_unique<short unsigned int[]>(NextBucketHashSize); 0733 std::copy_n(m_nextBucketHash, NextBucketHashSize, ret.get()); 0734 std::fill_n(m_nextBucketHash, NextBucketHashSize, 0); 0735 return ret; 0736 } 0737 0738 // otherwise we can just take the pointer directly 0739 return std::unique_ptr<short unsigned int[]>(std::exchange(m_nextBucketHash, nullptr)); 0740 } 0741 0742 uint available() const 0743 { 0744 return m_available; 0745 } 0746 0747 uint usedMemory() const 0748 { 0749 return ItemRepositoryBucketSize - m_available; 0750 } 0751 0752 template <class Visitor> 0753 bool visitAllItems(Visitor& visitor) const 0754 { 0755 m_lastUsed = 0; 0756 for (uint a = 0; a < ObjectMapSize; ++a) { 0757 uint currentIndex = m_objectMap[a]; 0758 while (currentIndex) { 0759 //Get the follower early, so there is no problems when the current 0760 //index is removed 0761 0762 if (!visitor(reinterpret_cast<const Item*>(m_data + currentIndex))) 0763 return false; 0764 0765 currentIndex = followerIndex(currentIndex); 0766 } 0767 } 0768 0769 return true; 0770 } 0771 0772 ///Returns whether something was changed 0773 template <class Repository> 0774 int finalCleanup(Repository& repository) 0775 { 0776 int changed = 0; 0777 0778 while (m_dirty) { 0779 m_dirty = false; 0780 0781 for (uint a = 0; a < ObjectMapSize; ++a) { 0782 uint currentIndex = m_objectMap[a]; 0783 while (currentIndex) { 0784 //Get the follower early, so there is no problems when the current 0785 //index is removed 0786 0787 const Item* item = reinterpret_cast<const Item*>(m_data + currentIndex); 0788 0789 if (!ItemRequest::persistent(item)) { 0790 changed += item->itemSize(); 0791 deleteItem(currentIndex, item->hash(), repository); 0792 m_dirty = true; //Set to dirty so we re-iterate 0793 break; 0794 } 0795 0796 currentIndex = followerIndex(currentIndex); 0797 } 0798 } 0799 } 0800 return changed; 0801 } 0802 0803 unsigned short nextBucketForHash(uint hash) const 0804 { 0805 m_lastUsed = 0; 0806 return m_nextBucketHash[hash % NextBucketHashSize]; 0807 } 0808 0809 void setNextBucketForHash(unsigned int hash, unsigned short bucket) 0810 { 0811 m_lastUsed = 0; 0812 prepareChange(); 0813 m_nextBucketHash[hash % NextBucketHashSize] = bucket; 0814 } 0815 0816 uint freeItemCount() const 0817 { 0818 return m_freeItemCount; 0819 } 0820 0821 short unsigned int totalFreeItemsSize() const 0822 { 0823 short unsigned int ret = 0; 0824 short unsigned int currentIndex = m_largestFreeItem; 0825 while (currentIndex) { 0826 ret += freeSize(currentIndex); 0827 currentIndex = followerIndex(currentIndex); 0828 } 0829 return ret; 0830 } 0831 0832 //Size of the largest item that could be inserted into this bucket 0833 short unsigned int largestFreeSize() const 0834 { 0835 short unsigned int ret = 0; 0836 if (m_largestFreeItem) 0837 ret = freeSize(m_largestFreeItem); 0838 if (m_available > ( uint )(AdditionalSpacePerItem + ( uint )ret)) { 0839 ret = m_available - AdditionalSpacePerItem; 0840 Q_ASSERT(ret == (m_available - AdditionalSpacePerItem)); 0841 } 0842 return ret; 0843 } 0844 0845 bool canAllocateItem(unsigned int size) const 0846 { 0847 short unsigned int currentIndex = m_largestFreeItem; 0848 while (currentIndex) { 0849 short unsigned int currentFree = freeSize(currentIndex); 0850 if (currentFree < size) 0851 return false; 0852 //Either we need an exact match, or 2 additional bytes to manage the resulting gap 0853 if (size == currentFree || currentFree - size >= AdditionalSpacePerItem + 2) 0854 return true; 0855 currentIndex = followerIndex(currentIndex); 0856 } 0857 0858 return false; 0859 } 0860 0861 void tick() const 0862 { 0863 ++m_lastUsed; 0864 } 0865 0866 //How many ticks ago the item was last used 0867 int lastUsed() const 0868 { 0869 return m_lastUsed; 0870 } 0871 0872 //Whether this bucket was changed since it was last stored 0873 bool changed() const 0874 { 0875 return m_changed; 0876 } 0877 0878 void prepareChange() 0879 { 0880 m_changed = true; 0881 m_dirty = true; 0882 makeDataPrivate(); 0883 } 0884 0885 bool dirty() const 0886 { 0887 return m_dirty; 0888 } 0889 0890 ///Returns the count of following buckets that were merged onto this buckets data array 0891 int monsterBucketExtent() const 0892 { 0893 return m_monsterBucketExtent; 0894 } 0895 0896 //Counts together the space that is neither accessible through m_objectMap nor through the free items 0897 uint lostSpace() 0898 { 0899 if (m_monsterBucketExtent) 0900 return 0; 0901 0902 uint need = ItemRepositoryBucketSize - m_available; 0903 uint found = 0; 0904 0905 for (uint a = 0; a < ObjectMapSize; ++a) { 0906 uint currentIndex = m_objectMap[a]; 0907 while (currentIndex) { 0908 found += reinterpret_cast<const Item*>(m_data + currentIndex)->itemSize() + AdditionalSpacePerItem; 0909 0910 currentIndex = followerIndex(currentIndex); 0911 } 0912 } 0913 0914 uint currentIndex = m_largestFreeItem; 0915 while (currentIndex) { 0916 found += freeSize(currentIndex) + AdditionalSpacePerItem; 0917 0918 currentIndex = followerIndex(currentIndex); 0919 } 0920 return need - found; 0921 } 0922 0923 private: 0924 0925 void makeDataPrivate() 0926 { 0927 if (m_mappedData == m_data) { 0928 short unsigned int* oldObjectMap = m_objectMap; 0929 short unsigned int* oldNextBucketHash = m_nextBucketHash; 0930 0931 m_data = new char[dataSize()]; 0932 m_objectMap = new short unsigned int[ObjectMapSize]; 0933 m_nextBucketHash = new short unsigned int[NextBucketHashSize]; 0934 0935 std::copy_n(m_mappedData, dataSize(), m_data); 0936 std::copy_n(oldObjectMap, ObjectMapSize, m_objectMap); 0937 std::copy_n(oldNextBucketHash, NextBucketHashSize, m_nextBucketHash); 0938 } 0939 } 0940 0941 ///Merges the given index item, which must have a freeSize() set, to surrounding free items, and inserts the result. 0942 ///The given index itself should not be in the free items chain yet. 0943 ///Returns whether the item was inserted somewhere. 0944 void insertFreeItem(unsigned short index) 0945 { 0946 //If the item-size is fixed, we don't need to do any management. Just keep a list of free items. Items of other size will never be requested. 0947 if (!fixedItemSize) { 0948 unsigned short currentIndex = m_largestFreeItem; 0949 unsigned short previousIndex = 0; 0950 0951 while (currentIndex) { 0952 Q_ASSERT(currentIndex != index); 0953 0954 #ifndef QT_NO_DEBUG 0955 unsigned short currentFreeSize = freeSize(currentIndex); 0956 #endif 0957 0958 ///@todo Achieve this without iterating through all items in the bucket(This is very slow) 0959 //Merge behind index 0960 if (currentIndex == index + freeSize(index) + AdditionalSpacePerItem) { 0961 //Remove currentIndex from the free chain, since it's merged backwards into index 0962 if (previousIndex && followerIndex(currentIndex)) 0963 Q_ASSERT(freeSize(previousIndex) >= freeSize(followerIndex(currentIndex))); 0964 0965 if (previousIndex) 0966 setFollowerIndex(previousIndex, followerIndex(currentIndex)); 0967 else 0968 m_largestFreeItem = followerIndex(currentIndex); 0969 0970 --m_freeItemCount; //One was removed 0971 0972 //currentIndex is directly behind index, touching its space. Merge them. 0973 setFreeSize(index, freeSize(index) + AdditionalSpacePerItem + freeSize(currentIndex)); 0974 0975 //Recurse to do even more merging 0976 insertFreeItem(index); 0977 return; 0978 } 0979 0980 //Merge before index 0981 if (index == currentIndex + freeSize(currentIndex) + AdditionalSpacePerItem) { 0982 if (previousIndex && followerIndex(currentIndex)) 0983 Q_ASSERT(freeSize(previousIndex) >= freeSize(followerIndex(currentIndex))); 0984 0985 //Remove currentIndex from the free chain, since insertFreeItem wants 0986 //it not to be in the chain, and it will be inserted in another place 0987 if (previousIndex) 0988 setFollowerIndex(previousIndex, followerIndex(currentIndex)); 0989 else 0990 m_largestFreeItem = followerIndex(currentIndex); 0991 0992 --m_freeItemCount; //One was removed 0993 0994 //index is directly behind currentIndex, touching its space. Merge them. 0995 setFreeSize(currentIndex, freeSize(currentIndex) + AdditionalSpacePerItem + freeSize(index)); 0996 0997 //Recurse to do even more merging 0998 insertFreeItem(currentIndex); 0999 return; 1000 } 1001 1002 previousIndex = currentIndex; 1003 currentIndex = followerIndex(currentIndex); 1004 #ifndef QT_NO_DEBUG 1005 if (currentIndex) 1006 Q_ASSERT(freeSize(currentIndex) <= currentFreeSize); 1007 #endif 1008 } 1009 } 1010 insertToFreeChain(index); 1011 } 1012 1013 ///Only inserts the item in the correct position into the free chain. index must not be in the chain yet. 1014 void insertToFreeChain(unsigned short index) 1015 { 1016 if (!fixedItemSize) { 1017 ///@todo Use some kind of tree to find the correct position in the chain(This is very slow) 1018 //Insert the free item into the chain opened by m_largestFreeItem 1019 unsigned short currentIndex = m_largestFreeItem; 1020 unsigned short previousIndex = 0; 1021 1022 unsigned short size = freeSize(index); 1023 1024 while (currentIndex && freeSize(currentIndex) > size) { 1025 Q_ASSERT(currentIndex != index); //must not be in the chain yet 1026 previousIndex = currentIndex; 1027 currentIndex = followerIndex(currentIndex); 1028 } 1029 1030 if (currentIndex) 1031 Q_ASSERT(freeSize(currentIndex) <= size); 1032 1033 setFollowerIndex(index, currentIndex); 1034 1035 if (previousIndex) { 1036 Q_ASSERT(freeSize(previousIndex) >= size); 1037 setFollowerIndex(previousIndex, index); 1038 } else 1039 //This item is larger than all already registered free items, or there are none. 1040 m_largestFreeItem = index; 1041 } else { 1042 Q_ASSERT(freeSize(index) == fixedItemSize); 1043 //When all items have the same size, just prepent to the front. 1044 setFollowerIndex(index, m_largestFreeItem); 1045 m_largestFreeItem = index; 1046 } 1047 1048 ++m_freeItemCount; 1049 } 1050 1051 /// Returns true if the given index is right behind free space, and thus can be merged to the free space. 1052 bool isBehindFreeSpace(unsigned short index) const 1053 { 1054 // TODO: Without iteration! 1055 unsigned short currentIndex = m_largestFreeItem; 1056 1057 while (currentIndex) { 1058 if (index == currentIndex + freeSize(currentIndex) + AdditionalSpacePerItem) 1059 return true; 1060 1061 currentIndex = followerIndex(currentIndex); 1062 } 1063 return false; 1064 } 1065 1066 /// @param index the index of an item @return The index of the next item in the chain of items with a same local hash, or zero 1067 inline unsigned short followerIndex(unsigned short index) const 1068 { 1069 Q_ASSERT(index >= 2); 1070 return *reinterpret_cast<unsigned short*>(m_data + (index - 2)); 1071 } 1072 1073 void setFollowerIndex(unsigned short index, unsigned short follower) 1074 { 1075 Q_ASSERT(index >= 2); 1076 *reinterpret_cast<unsigned short*>(m_data + (index - 2)) = follower; 1077 } 1078 // Only returns the current value if the item is actually free 1079 inline unsigned short freeSize(unsigned short index) const 1080 { 1081 return *reinterpret_cast<unsigned short*>(m_data + index); 1082 } 1083 1084 //Convenience function to set the free-size, only for freed items 1085 void setFreeSize(unsigned short index, unsigned short size) 1086 { 1087 *reinterpret_cast<unsigned short*>(m_data + index) = size; 1088 } 1089 1090 private: 1091 int m_monsterBucketExtent = 0; //If this is a monster-bucket, this contains the count of follower-buckets that belong to this one 1092 unsigned int m_available = 0; 1093 char* m_data = nullptr; //Structure of the data: <Position of next item with same hash modulo ItemRepositoryBucketSize>(2 byte), <Item>(item.size() byte) 1094 char* m_mappedData = nullptr; //Read-only memory-mapped data. If this equals m_data, m_data must not be written 1095 short unsigned int* m_objectMap = nullptr; //Points to the first object in m_data with (hash % ObjectMapSize) == index. Points to the item itself, so subtract 1 to get the pointer to the next item with same local hash. 1096 short unsigned int m_largestFreeItem = 0; //Points to the largest item that is currently marked as free, or zero. That one points to the next largest one through followerIndex 1097 unsigned int m_freeItemCount = 0; 1098 1099 unsigned short* m_nextBucketHash = nullptr; 1100 1101 bool m_dirty = false; //Whether the data was changed since the last finalCleanup 1102 bool m_changed = false; //Whether this bucket was changed since it was last stored to disk 1103 mutable int m_lastUsed = 0; //How many ticks ago this bucket was last accessed 1104 }; 1105 1106 ///This object needs to be kept alive as long as you change the contents of an item 1107 ///stored in the repository. It is needed to correctly track the reference counting 1108 ///within disk-storage. 1109 template <class Item, bool markForReferenceCounting> 1110 class DynamicItem : public OptionalDUChainReferenceCountingEnabler<markForReferenceCounting> 1111 { 1112 public: 1113 explicit DynamicItem(Item* i, const void* start, unsigned size) 1114 : OptionalDUChainReferenceCountingEnabler<markForReferenceCounting>(start, size) 1115 , m_item{i} 1116 { 1117 // qDebug() << "enabling" << i << "to" << (void*)(((char*)i)+size); 1118 } 1119 1120 Item* operator->() const { return m_item; } 1121 1122 private: 1123 Item* const m_item; 1124 }; 1125 1126 struct ItemRepositoryStatistics { 1127 uint loadedBuckets = -1; 1128 uint currentBucket = -1; 1129 uint usedMemory = -1; 1130 uint loadedMonsterBuckets = -1; 1131 uint usedSpaceForBuckets = -1; 1132 uint freeSpaceInBuckets = -1; 1133 uint lostSpace = -1; 1134 uint freeUnreachableSpace = -1; 1135 uint hashClashedItems = -1; 1136 uint totalItems = -1; 1137 uint emptyBuckets; 1138 uint hashSize = -1; // How big the hash is 1139 uint hashUse = -1; // How many slots in the hash are used 1140 uint averageInBucketHashSize = -1; 1141 uint averageInBucketUsedSlotCount = -1; 1142 float averageInBucketSlotChainLength = -1; 1143 uint longestInBucketChain = -1; 1144 1145 uint longestNextBucketChain = -1; 1146 uint totalBucketFollowerSlots = -1; // Total count of used slots in the nextBucketForHash structure 1147 float averageNextBucketForHashSequenceLength 1148 = -1; // Average sequence length of a nextBucketForHash sequence(If not empty) 1149 1150 QString print() const 1151 { 1152 QString ret; 1153 ret += QStringLiteral("loaded buckets: %1 current bucket: %2 used memory: %3 loaded monster buckets: %4") 1154 .arg(loadedBuckets) 1155 .arg(currentBucket) 1156 .arg(usedMemory) 1157 .arg(loadedMonsterBuckets); 1158 ret += QStringLiteral("\nbucket hash clashed items: %1 total items: %2").arg(hashClashedItems).arg(totalItems); 1159 ret += QStringLiteral("\nused space for buckets: %1 free space in buckets: %2 lost space: %3") 1160 .arg(usedSpaceForBuckets) 1161 .arg(freeSpaceInBuckets) 1162 .arg(lostSpace); 1163 ret += QStringLiteral("\nfree unreachable space: %1 empty buckets: %2") 1164 .arg(freeUnreachableSpace) 1165 .arg(emptyBuckets); 1166 ret += QStringLiteral("\nhash size: %1 hash slots used: %2").arg(hashSize).arg(hashUse); 1167 ret += QStringLiteral("\naverage in-bucket hash size: %1 average in-bucket used hash slot count: %2 average " 1168 "in-bucket slot chain length: %3 longest in-bucket follower chain: %4") 1169 .arg(averageInBucketHashSize) 1170 .arg(averageInBucketUsedSlotCount) 1171 .arg(averageInBucketSlotChainLength) 1172 .arg(longestInBucketChain); 1173 ret += QStringLiteral("\ntotal count of used next-bucket-for-hash slots: %1 average next-bucket-for-hash " 1174 "sequence length: %2 longest next-bucket chain: %3") 1175 .arg(totalBucketFollowerSlots) 1176 .arg(averageNextBucketForHashSequenceLength) 1177 .arg(longestNextBucketChain); 1178 return ret; 1179 } 1180 operator QString() const { return print(); } 1181 }; 1182 1183 /** 1184 * The ItemRepository is essentially an on-disk key/value hash map 1185 * 1186 * Access to this structure is assumed to happen in a thread safe manner, e.g. 1187 * by locking a mutex externally. The API can then call itself without having 1188 * to re-lock anything internally, thus is capable to work with a non-recursive mutex. 1189 * If desired, a recursive mutex can be used too though as needed. 1190 * 1191 * @tparam Item See ExampleItem 1192 * @tparam ItemRequest See ExampleReqestItem 1193 * @tparam fixedItemSize When this is true, all inserted items must have the same size. 1194 * This greatly simplifies and speeds up the task of managing free items within the buckets. 1195 * @tparam markForReferenceCounting Whether the data within the repository should be marked for reference-counting. 1196 * This costs a bit of performance, but must be enabled if there may be data in the 1197 * repository that does on-disk reference counting, like IndexedString, 1198 * IndexedIdentifier, etc. 1199 * @tparam Mutex The mutex type to use internally. It has to be locked externally before accessing the item repository 1200 * from multiple threads. 1201 */ 1202 1203 template <class Item, class ItemRequest, bool markForReferenceCounting = true, typename Mutex = QMutex, 1204 uint fixedItemSize = 0, unsigned int targetBucketHashSize = 524288 * 2> 1205 class ItemRepository : public AbstractItemRepository 1206 { 1207 using MyBucket = Bucket<Item, ItemRequest, markForReferenceCounting, fixedItemSize>; 1208 1209 enum { 1210 //Must be a multiple of Bucket::ObjectMapSize, so Bucket::hasClashingItem can be computed 1211 //Must also be a multiple of Bucket::NextBucketHashSize, for the same reason.(Currently those are same) 1212 bucketHashSize = (targetBucketHashSize / MyBucket::ObjectMapSize) * MyBucket::ObjectMapSize 1213 }; 1214 1215 enum { 1216 BucketStartOffset = sizeof(uint) * 7 + sizeof(short unsigned int) * bucketHashSize //Position in the data where the bucket array starts 1217 }; 1218 1219 Q_DISABLE_COPY_MOVE(ItemRepository) 1220 public: 1221 ///@param registry May be zero, then the repository will not be registered at all. Else, the repository will 1222 /// register itself to that registry. 1223 /// If this is zero, you have to care about storing the data using store() and/or close() by 1224 /// yourself. It does not happen automatically. For the global standard registry, the storing/loading 1225 /// is triggered from within duchain, so you don't need to care about it. 1226 explicit ItemRepository(const QString& repositoryName, Mutex* mutex, 1227 ItemRepositoryRegistry* registry = &globalItemRepositoryRegistry(), 1228 uint repositoryVersion = 1) 1229 : m_repositoryName(repositoryName) 1230 , m_repositoryVersion(repositoryVersion) 1231 , m_mutex(mutex) 1232 , m_registry(registry) 1233 { 1234 if (m_registry) 1235 m_registry->registerRepository(this); 1236 } 1237 1238 ~ItemRepository() override 1239 { 1240 if (m_registry) 1241 m_registry->unRegisterRepository(this); 1242 1243 close(); 1244 } 1245 1246 ///Unloading of buckets is enabled by default. Use this to disable it. When unloading is enabled, the data 1247 ///gotten from must only itemFromIndex must not be used for a long time. 1248 void setUnloadingEnabled(bool enabled) 1249 { 1250 m_unloadingEnabled = enabled; 1251 } 1252 1253 ///Returns the index for the given item. If the item is not in the repository yet, it is inserted. 1254 ///The index can never be zero. Zero is reserved for your own usage as invalid 1255 ///@param request Item to retrieve the index from 1256 unsigned int index(const ItemRequest& request) 1257 { 1258 const uint hash = request.hash(); 1259 const uint size = request.itemSize(); 1260 1261 // Bucket indexes tracked while walking the bucket chain for this request hash 1262 unsigned short bucketInChainWithSpace = 0; 1263 unsigned short lastBucketWalked = 0; 1264 1265 const ushort foundIndexInBucket = walkBucketChain(hash, [&](ushort bucketIdx, const MyBucket* bucketPtr) { 1266 lastBucketWalked = bucketIdx; 1267 1268 const ushort found = bucketPtr->findIndex(request); 1269 1270 if (!found && !bucketInChainWithSpace && bucketPtr->canAllocateItem(size)) { 1271 bucketInChainWithSpace = bucketIdx; 1272 } 1273 1274 return found; 1275 }); 1276 1277 if (foundIndexInBucket) { 1278 // 'request' is already present, return the existing index 1279 Q_ASSERT(m_currentBucket); 1280 Q_ASSERT(m_currentBucket < m_buckets.size()); 1281 return createIndex(lastBucketWalked, foundIndexInBucket); 1282 } 1283 1284 /* 1285 * Disclaimer: Writer of comment != writer of code, believe with caution 1286 * 1287 * Requested item does not yet exist. Need to find a place to put it... 1288 * 1289 * First choice is to place it in an existing bucket in the chain for the request hash 1290 * Second choice is to find an existing bucket anywhere with enough space 1291 * Otherwise use m_currentBucket (the latest unused bucket) 1292 * 1293 * If the chosen bucket fails to allocate the item, merge buckets to create a monster (dragon?) 1294 * 1295 * Finally, if the first option failed or the selected bucket failed to allocate, add the 1296 * bucket which the item ended up in to the chain of buckets for the request's hash 1297 */ 1298 1299 m_metaDataChanged = true; 1300 1301 const bool pickedBucketInChain = bucketInChainWithSpace; 1302 int useBucket = bucketInChainWithSpace; 1303 1304 if (!pickedBucketInChain) { 1305 //Try finding an existing bucket with deleted space to store the data into 1306 for (int a = 0; a < m_freeSpaceBuckets.size(); ++a) { 1307 MyBucket* bucketPtr = bucketForIndex(m_freeSpaceBuckets[a]); 1308 Q_ASSERT(bucketPtr->largestFreeSize()); 1309 1310 if (bucketPtr->canAllocateItem(size)) { 1311 //The item fits into the bucket. 1312 useBucket = m_freeSpaceBuckets[a]; 1313 break; 1314 } 1315 } 1316 } 1317 1318 auto advanceToNextBucket = [&]() { 1319 //This should never happen when we picked a bucket for re-use 1320 Q_ASSERT(!pickedBucketInChain); 1321 Q_ASSERT(useBucket == m_currentBucket); 1322 // note: the bucket may be empty and/or in the free list, but it might still be too small 1323 // for a monster bucket request 1324 1325 ++m_currentBucket; 1326 Q_ASSERT(m_currentBucket < ItemRepositoryBucketLimit); 1327 useBucket = m_currentBucket; 1328 }; 1329 1330 //The item isn't in the repository yet, find a new bucket for it 1331 while (1) { 1332 if (useBucket >= m_buckets.size()) { 1333 if (m_buckets.size() >= 0xfffe) { //We have reserved the last bucket index 0xffff for special purposes 1334 //the repository has overflown. 1335 qWarning() << "Found no room for an item in" << m_repositoryName << "size of the item:" << 1336 request.itemSize(); 1337 return 0; 1338 } else { 1339 allocateNextBuckets(ItemRepositoryBucketLinearGrowthFactor); 1340 } 1341 } 1342 1343 if (!useBucket) { 1344 Q_ASSERT(m_currentBucket); 1345 useBucket = m_currentBucket; 1346 } 1347 1348 // don't put an item into the tail of a monster bucket 1349 if (m_monsterBucketTailMarker[useBucket]) { 1350 advanceToNextBucket(); 1351 continue; 1352 } 1353 1354 auto* bucketPtr = bucketForIndex(useBucket); 1355 1356 ENSURE_REACHABLE(useBucket); 1357 Q_ASSERT_X(!bucketPtr->findIndex( 1358 request), Q_FUNC_INFO, 1359 "found item in unexpected bucket, ensure your ItemRequest::equals method is correct. Note: For custom AbstractType's e.g. ensure you have a proper equals() override"); 1360 1361 unsigned short indexInBucket = bucketPtr->index(request, size); 1362 1363 //If we could not allocate the item in an empty bucket, then we need to create a monster-bucket that 1364 //can hold the data. 1365 if (bucketPtr->isEmpty() && !indexInBucket) { 1366 ///@todo Move this compound statement into an own function 1367 uint totalSize = size + MyBucket::AdditionalSpacePerItem; 1368 1369 Q_ASSERT((totalSize > ItemRepositoryBucketSize)); 1370 1371 useBucket = 0; 1372 //The item did not fit in, we need a monster-bucket(Merge consecutive buckets) 1373 ///Step one: Search whether we can merge multiple empty buckets in the free-list into one monster-bucket 1374 int rangeStart = -1; 1375 int rangeEnd = -1; 1376 for (int a = 0; a < m_freeSpaceBuckets.size(); ++a) { 1377 MyBucket* bucketPtr = bucketForIndex(m_freeSpaceBuckets[a]); 1378 if (bucketPtr->isEmpty()) { 1379 //This bucket is a candidate for monster-bucket merging 1380 int index = ( int )m_freeSpaceBuckets[a]; 1381 if (rangeEnd != index) { 1382 rangeStart = index; 1383 rangeEnd = index + 1; 1384 } else { 1385 ++rangeEnd; 1386 } 1387 if (rangeStart != rangeEnd) { 1388 uint extent = rangeEnd - rangeStart - 1; 1389 uint totalAvailableSpace = bucketForIndex(rangeStart)->available() + 1390 MyBucket::DataSize* (rangeEnd - rangeStart - 1); 1391 if (totalAvailableSpace > totalSize) { 1392 Q_ASSERT(extent); 1393 ///We can merge these buckets into one monster-bucket that can hold the data 1394 Q_ASSERT((uint)m_freeSpaceBuckets[a - extent] == (uint)rangeStart); 1395 useBucket = rangeStart; 1396 convertMonsterBucket(rangeStart, extent); 1397 1398 break; 1399 } 1400 } 1401 } 1402 } 1403 1404 if (!useBucket) { 1405 //Create a new monster-bucket at the end of the data 1406 int needMonsterExtent = (totalSize - ItemRepositoryBucketSize) / MyBucket::DataSize + 1; 1407 Q_ASSERT(needMonsterExtent); 1408 const auto currentBucketIncrease = needMonsterExtent + 1; 1409 Q_ASSERT(m_currentBucket); 1410 if (m_currentBucket + currentBucketIncrease >= m_buckets.size()) { 1411 allocateNextBuckets(ItemRepositoryBucketLinearGrowthFactor + currentBucketIncrease); 1412 } 1413 useBucket = m_currentBucket; 1414 1415 convertMonsterBucket(useBucket, needMonsterExtent); 1416 m_currentBucket += currentBucketIncrease; 1417 Q_ASSERT(m_currentBucket < ItemRepositoryBucketLimit); 1418 Q_ASSERT(m_buckets[useBucket]); 1419 Q_ASSERT(m_buckets[useBucket]->monsterBucketExtent() == needMonsterExtent); 1420 } 1421 Q_ASSERT(useBucket); 1422 bucketPtr = bucketForIndex(useBucket); 1423 1424 indexInBucket = bucketPtr->index(request, size); 1425 Q_ASSERT(indexInBucket); 1426 } 1427 1428 Q_ASSERT(m_currentBucket); 1429 1430 if (indexInBucket) { 1431 ++m_statItemCount; 1432 1433 const int previousBucketNumber = lastBucketWalked; 1434 unsigned short* const bucketHashPosition = m_firstBucketForHash + (hash % bucketHashSize); 1435 1436 if (!(*bucketHashPosition)) { 1437 Q_ASSERT(!previousBucketNumber); 1438 (*bucketHashPosition) = useBucket; 1439 ENSURE_REACHABLE(useBucket); 1440 } else if (!pickedBucketInChain && previousBucketNumber && previousBucketNumber != useBucket) { 1441 //Should happen rarely 1442 ++m_statBucketHashClashes; 1443 1444 ///Debug: Detect infinite recursion 1445 ifDebugInfiniteRecursion(Q_ASSERT(walkBucketLinks(*bucketHashPosition, hash, previousBucketNumber)); 1446 ) 1447 1448 //Find the position where the paths of useBucket and *bucketHashPosition intersect, and insert useBucket 1449 //there. That way, we don't create loops. 1450 QPair<unsigned int, unsigned int> intersect = hashChainIntersection(*bucketHashPosition, useBucket, 1451 hash); 1452 1453 Q_ASSERT(m_buckets[previousBucketNumber]->nextBucketForHash(hash) == 0); 1454 1455 if (!intersect.second) { 1456 ifDebugInfiniteRecursion(Q_ASSERT(!walkBucketLinks(*bucketHashPosition, hash, useBucket)); ) 1457 ifDebugInfiniteRecursion(Q_ASSERT(!walkBucketLinks(useBucket, hash, previousBucketNumber)); ) 1458 1459 Q_ASSERT(m_buckets[previousBucketNumber]->nextBucketForHash(hash) == 0); 1460 1461 m_buckets[previousBucketNumber]->setNextBucketForHash(hash, useBucket); 1462 ENSURE_REACHABLE(useBucket); 1463 ENSURE_REACHABLE(previousBucketNumber); 1464 1465 ifDebugInfiniteRecursion(Q_ASSERT(walkBucketLinks(*bucketHashPosition, hash, useBucket)); ) 1466 } else if (intersect.first) { 1467 ifDebugInfiniteRecursion(Q_ASSERT(bucketForIndex(intersect.first)->nextBucketForHash(hash) == 1468 intersect.second); ) 1469 ifDebugInfiniteRecursion(Q_ASSERT(!walkBucketLinks(*bucketHashPosition, hash, useBucket)); ) 1470 ifDebugInfiniteRecursion(Q_ASSERT(walkBucketLinks(*bucketHashPosition, hash, intersect.second)); 1471 ) 1472 ifDebugInfiniteRecursion(Q_ASSERT(walkBucketLinks(*bucketHashPosition, hash, intersect.first)); 1473 ) 1474 ifDebugInfiniteRecursion(Q_ASSERT(bucketForIndex(intersect.first)->nextBucketForHash(hash) == 1475 intersect.second); ) 1476 1477 ifDebugInfiniteRecursion(Q_ASSERT(walkBucketLinks(useBucket, hash, intersect.second)); ) 1478 ifDebugInfiniteRecursion(Q_ASSERT(!walkBucketLinks(useBucket, hash, intersect.first)); ) 1479 1480 bucketForIndex(intersect.first)->setNextBucketForHash(hash, useBucket); 1481 1482 ENSURE_REACHABLE(useBucket); 1483 ENSURE_REACHABLE(intersect.second); 1484 1485 ifDebugInfiniteRecursion(Q_ASSERT(walkBucketLinks(*bucketHashPosition, hash, useBucket)); ) 1486 ifDebugInfiniteRecursion(Q_ASSERT(walkBucketLinks(*bucketHashPosition, hash, intersect.second)); 1487 ) 1488 } else { 1489 //State: intersect.first == 0 && intersect.second != 0. This means that whole complete 1490 //chain opened by *bucketHashPosition with the hash-value is also following useBucket, 1491 //so useBucket can just be inserted at the top 1492 1493 ifDebugInfiniteRecursion(Q_ASSERT(!walkBucketLinks(*bucketHashPosition, hash, useBucket)); ) 1494 ifDebugInfiniteRecursion(Q_ASSERT(walkBucketLinks(useBucket, hash, *bucketHashPosition)); ) 1495 unsigned short oldStart = *bucketHashPosition; 1496 1497 *bucketHashPosition = useBucket; 1498 1499 ENSURE_REACHABLE(useBucket); 1500 ENSURE_REACHABLE(oldStart); 1501 Q_UNUSED(oldStart); 1502 } 1503 } 1504 1505 const auto reOrderFreeSpaceBucketIndex = m_freeSpaceBuckets.indexOf(useBucket); 1506 if (reOrderFreeSpaceBucketIndex != -1) 1507 updateFreeSpaceOrder(reOrderFreeSpaceBucketIndex); 1508 1509 Q_ASSERT(m_currentBucket < m_buckets.size()); 1510 return createIndex(useBucket, indexInBucket); 1511 } else { 1512 advanceToNextBucket(); 1513 } 1514 } 1515 //We haven't found a bucket that already contains the item, so append it to the current bucket 1516 1517 qWarning() << "Found no bucket for an item in" << m_repositoryName; 1518 return 0; 1519 } 1520 1521 ///Returns zero if the item is not in the repository yet 1522 unsigned int findIndex(const ItemRequest& request) const 1523 { 1524 return walkBucketChain(request.hash(), [this, &request](ushort bucketIdx, const MyBucket* bucketPtr) { 1525 const ushort indexInBucket = bucketPtr->findIndex(request); 1526 return indexInBucket ? createIndex(bucketIdx, indexInBucket) : 0u; 1527 }); 1528 } 1529 1530 /// Returns nullptr if the item is not in the repository yet 1531 const Item* findItem(const ItemRequest& request) const 1532 { 1533 auto index = findIndex(request); 1534 if (!index) { 1535 return nullptr; 1536 } 1537 return itemFromIndex(index); 1538 } 1539 1540 ///Deletes the item from the repository. 1541 void deleteItem(unsigned int index) 1542 { 1543 verifyIndex(index); 1544 1545 m_metaDataChanged = true; 1546 1547 const uint hash = itemFromIndex(index)->hash(); 1548 const ushort bucket = (index >> 16); 1549 1550 //Apart from removing the item itself, we may have to recreate the nextBucketForHash link, so we need the previous bucket 1551 MyBucket* previousBucketPtr = nullptr; 1552 MyBucket* const bucketPtr = walkBucketChain(hash, 1553 [bucket, &previousBucketPtr](ushort bucketIdx, 1554 MyBucket* bucketPtr) -> MyBucket* { 1555 if (bucket != bucketIdx) { 1556 previousBucketPtr = bucketPtr; 1557 return nullptr; 1558 } 1559 return bucketPtr; // found bucket, stop looking 1560 }); 1561 1562 //Make sure the index was reachable through the hash chain 1563 Q_ASSERT(bucketPtr); 1564 1565 --m_statItemCount; 1566 1567 bucketPtr->deleteItem(index, hash, *this); 1568 1569 /** 1570 * Now check whether the link root/previousBucketNumber -> bucket is still needed. 1571 */ 1572 1573 if (!previousBucketPtr) { 1574 // This bucket is linked in the m_firstBucketForHash array, find the next clashing bucket in the chain 1575 // There may be items in the chain that clash only with MyBucket::NextBucketHashSize, skipped here 1576 m_firstBucketForHash[hash % 1577 bucketHashSize] = walkBucketChain(hash, [hash](ushort bucketIdx, MyBucket* bucketPtr) { 1578 if (bucketPtr->hasClashingItem(hash, bucketHashSize)) { 1579 return bucketIdx; 1580 } 1581 return static_cast<ushort>(0); 1582 }); 1583 } else if (!bucketPtr->hasClashingItem(hash, MyBucket::NextBucketHashSize)) { 1584 // TODO: Skip clashing items reachable from m_firstBucketForHash 1585 // (see note in usePermissiveModuloWhenRemovingClashLinks() test) 1586 1587 ENSURE_REACHABLE(bucket); 1588 1589 previousBucketPtr->setNextBucketForHash(hash, bucketPtr->nextBucketForHash(hash)); 1590 1591 Q_ASSERT(m_buckets[bucketPtr->nextBucketForHash(hash)] != previousBucketPtr); 1592 } 1593 1594 ENSURE_REACHABLE(bucket); 1595 1596 if (bucketPtr->monsterBucketExtent()) { 1597 //Convert the monster-bucket back to multiple normal buckets, and put them into the free list 1598 Q_ASSERT(bucketPtr->isEmpty()); 1599 if (!previousBucketPtr) { 1600 // see https://bugs.kde.org/show_bug.cgi?id=272408 1601 // the monster bucket will be deleted and new smaller ones created 1602 // the next bucket for this hash is invalid anyways as done above 1603 // but calling the below unconditionally leads to other issues... 1604 bucketPtr->setNextBucketForHash(hash, 0); 1605 } 1606 convertMonsterBucket(bucket, 0); 1607 } else { 1608 putIntoFreeList(bucket, bucketPtr); 1609 } 1610 1611 Q_ASSERT(m_currentBucket); 1612 Q_ASSERT(m_currentBucket < m_buckets.size()); 1613 } 1614 1615 using MyDynamicItem = DynamicItem<Item, markForReferenceCounting>; 1616 1617 ///This returns an editable version of the item. @warning: Never change an entry that affects the hash, 1618 ///or the equals(..) function. That would completely destroy the consistency. 1619 ///@param index The index. It must be valid(match an existing item), and nonzero. 1620 ///@warning If you use this, make sure you lock mutex() before calling, 1621 /// and hold it until you're ready using/changing the data.. 1622 MyDynamicItem dynamicItemFromIndex(unsigned int index) 1623 { 1624 verifyIndex(index); 1625 1626 unsigned short bucket = (index >> 16); 1627 1628 auto* bucketPtr = bucketForIndex(bucket); 1629 bucketPtr->prepareChange(); 1630 unsigned short indexInBucket = index & 0xffff; 1631 return MyDynamicItem(const_cast<Item*>(bucketPtr->itemFromIndex(indexInBucket)), 1632 bucketPtr->data(), bucketPtr->dataSize()); 1633 } 1634 1635 ///This returns an editable version of the item. @warning: Never change an entry that affects the hash, 1636 ///or the equals(..) function. That would completely destroy the consistency. 1637 ///@param index The index. It must be valid(match an existing item), and nonzero. 1638 ///@warning If you use this, make sure you lock mutex() before calling, 1639 /// and hold it until you're ready using/changing the data.. 1640 ///@warning If you change contained complex items that depend on reference-counting, you 1641 /// must use dynamicItemFromIndex(..) instead of dynamicItemFromIndexSimple(..) 1642 Item* dynamicItemFromIndexSimple(unsigned int index) 1643 { 1644 verifyIndex(index); 1645 1646 unsigned short bucket = (index >> 16); 1647 1648 auto* bucketPtr = bucketForIndex(bucket); 1649 bucketPtr->prepareChange(); 1650 unsigned short indexInBucket = index & 0xffff; 1651 return const_cast<Item*>(bucketPtr->itemFromIndex(indexInBucket)); 1652 } 1653 1654 ///@param index The index. It must be valid(match an existing item), and nonzero. 1655 const Item* itemFromIndex(unsigned int index) const 1656 { 1657 verifyIndex(index); 1658 1659 unsigned short bucket = (index >> 16); 1660 1661 const auto* bucketPtr = bucketForIndex(bucket); 1662 unsigned short indexInBucket = index & 0xffff; 1663 Q_ASSERT(m_currentBucket); 1664 Q_ASSERT(m_currentBucket < m_buckets.size()); 1665 return bucketPtr->itemFromIndex(indexInBucket); 1666 } 1667 1668 QString printStatistics() const final 1669 { 1670 return statistics().print(); 1671 } 1672 1673 ItemRepositoryStatistics statistics() const 1674 { 1675 Q_ASSERT(!m_currentBucket || m_currentBucket < m_buckets.size()); 1676 ItemRepositoryStatistics ret; 1677 uint loadedBuckets = 0; 1678 for (auto* bucket : m_buckets) { 1679 if (bucket) { 1680 ++loadedBuckets; 1681 } 1682 } 1683 1684 #ifdef DEBUG_MONSTERBUCKETS 1685 for (int a = 0; a < m_freeSpaceBuckets.size(); ++a) { 1686 if (a > 0) { 1687 uint prev = a - 1; 1688 uint prevLargestFree = bucketForIndex(m_freeSpaceBuckets[prev])->largestFreeSize(); 1689 uint largestFree = bucketForIndex(m_freeSpaceBuckets[a])->largestFreeSize(); 1690 Q_ASSERT((prevLargestFree < largestFree) || (prevLargestFree == largestFree && 1691 m_freeSpaceBuckets[prev] < m_freeSpaceBuckets[a])); 1692 } 1693 } 1694 1695 #endif 1696 ret.hashSize = bucketHashSize; 1697 ret.hashUse = 0; 1698 for (uint a = 0; a < bucketHashSize; ++a) 1699 if (m_firstBucketForHash[a]) 1700 ++ret.hashUse; 1701 1702 ret.emptyBuckets = 0; 1703 1704 uint loadedMonsterBuckets = 0; 1705 for (auto* bucket : m_buckets) { 1706 if (bucket && bucket->monsterBucketExtent()) { 1707 loadedMonsterBuckets += bucket->monsterBucketExtent() + 1; 1708 } 1709 } 1710 1711 uint usedBucketSpace = MyBucket::DataSize* m_currentBucket; 1712 uint freeBucketSpace = 0, freeUnreachableSpace = 0; 1713 uint lostSpace = 0; //Should be zero, else something is wrong 1714 uint totalInBucketHashSize = 0, totalInBucketUsedSlotCount = 0, totalInBucketChainLengths = 0; 1715 uint bucketCount = 0; 1716 1717 ret.totalBucketFollowerSlots = 0; 1718 ret.averageNextBucketForHashSequenceLength = 0; 1719 ret.longestNextBucketChain = 0; 1720 ret.longestInBucketChain = 0; 1721 1722 for (int a = 1; a < m_currentBucket + 1; ++a) { 1723 Q_ASSERT(!m_monsterBucketTailMarker[a]); 1724 1725 MyBucket* bucket = bucketForIndex(a); 1726 if (bucket) { 1727 ++bucketCount; 1728 1729 bucket->countFollowerIndexLengths(totalInBucketUsedSlotCount, totalInBucketChainLengths, 1730 totalInBucketHashSize, ret.longestInBucketChain); 1731 1732 for (uint aa = 0; aa < MyBucket::NextBucketHashSize; ++aa) { 1733 uint length = 0; 1734 uint next = bucket->nextBucketForHash(aa); 1735 if (next) { 1736 ++ret.totalBucketFollowerSlots; 1737 while (next) { 1738 ++length; 1739 ++ret.averageNextBucketForHashSequenceLength; 1740 next = bucketForIndex(next)->nextBucketForHash(aa); 1741 } 1742 } 1743 if (length > ret.longestNextBucketChain) { 1744 // qDebug() << "next-bucket-chain in" << a << aa << ":" << length; 1745 ret.longestNextBucketChain = length; 1746 } 1747 } 1748 1749 uint bucketFreeSpace = bucket->totalFreeItemsSize() + bucket->available(); 1750 freeBucketSpace += bucketFreeSpace; 1751 if (m_freeSpaceBuckets.indexOf(a) == -1) 1752 freeUnreachableSpace += bucketFreeSpace; 1753 1754 if (bucket->isEmpty()) { 1755 ++ret.emptyBuckets; 1756 Q_ASSERT(!bucket->monsterBucketExtent()); 1757 #ifdef DEBUG_MONSTERBUCKETS 1758 Q_ASSERT(m_freeSpaceBuckets.contains(a)); 1759 #endif 1760 } 1761 1762 lostSpace += bucket->lostSpace(); 1763 a += bucket->monsterBucketExtent(); 1764 } 1765 } 1766 1767 if (ret.totalBucketFollowerSlots) 1768 ret.averageNextBucketForHashSequenceLength /= ret.totalBucketFollowerSlots; 1769 1770 ret.loadedBuckets = loadedBuckets; 1771 ret.currentBucket = m_currentBucket; 1772 ret.usedMemory = usedMemory(); 1773 ret.loadedMonsterBuckets = loadedMonsterBuckets; 1774 1775 ret.hashClashedItems = m_statBucketHashClashes; 1776 ret.totalItems = m_statItemCount; 1777 ret.usedSpaceForBuckets = usedBucketSpace; 1778 ret.freeSpaceInBuckets = freeBucketSpace; 1779 ret.lostSpace = lostSpace; 1780 1781 ret.freeUnreachableSpace = freeUnreachableSpace; 1782 ret.averageInBucketHashSize = bucketCount ? (totalInBucketHashSize / bucketCount) : 0; 1783 ret.averageInBucketUsedSlotCount = bucketCount ? (totalInBucketUsedSlotCount / bucketCount) : 0; 1784 ret.averageInBucketSlotChainLength = float( totalInBucketChainLengths ) / totalInBucketUsedSlotCount; 1785 1786 //If m_statBucketHashClashes is high, the bucket-hash needs to be bigger 1787 return ret; 1788 } 1789 1790 ///This can be used to safely iterate through all items in the repository 1791 ///@param visitor Should be an instance of a class that has a bool operator()(const Item*) member function, 1792 /// that returns whether more items are wanted. 1793 ///@param onlyInMemory If this is true, only items are visited that are currently in memory. 1794 template <class Visitor> 1795 void visitAllItems(Visitor& visitor, bool onlyInMemory = false) const 1796 { 1797 for (int a = 1; a <= m_currentBucket; ++a) { 1798 if (!onlyInMemory || m_buckets.at(a)) { 1799 auto bucket = bucketForIndex(a); 1800 if (bucket && !bucket->visitAllItems(visitor)) 1801 return; 1802 } 1803 } 1804 } 1805 1806 QString repositoryName() const final { return m_repositoryName; } 1807 1808 Mutex* mutex() const { return m_mutex; } 1809 1810 void lock() final { m_mutex->lock(); } 1811 void unlock() final { m_mutex->unlock(); } 1812 1813 private: 1814 void writeMetadata() 1815 { 1816 using namespace ItemRepositoryUtils; 1817 1818 Q_ASSERT(m_file); 1819 Q_ASSERT(m_dynamicFile); 1820 1821 m_file->seek(0); 1822 writeValue(m_file, m_repositoryVersion); 1823 uint hashSize = bucketHashSize; 1824 writeValue(m_file, hashSize); 1825 uint itemRepositoryVersion = staticItemRepositoryVersion(); 1826 writeValue(m_file, itemRepositoryVersion); 1827 writeValue(m_file, m_statBucketHashClashes); 1828 writeValue(m_file, m_statItemCount); 1829 1830 const uint bucketCount = static_cast<uint>(m_buckets.size()); 1831 writeValue(m_file, bucketCount); 1832 writeValue(m_file, m_currentBucket); 1833 writeValues(m_file, bucketHashSize, m_firstBucketForHash); 1834 Q_ASSERT(m_file->pos() == BucketStartOffset); 1835 1836 m_dynamicFile->seek(0); 1837 writeValue(m_dynamicFile, static_cast<uint>(m_freeSpaceBuckets.size())); 1838 writeList(m_dynamicFile, m_freeSpaceBuckets); 1839 1840 Q_ASSERT(m_buckets.size() == m_monsterBucketTailMarker.size()); 1841 writeList(m_dynamicFile, m_monsterBucketTailMarker); 1842 } 1843 1844 ///Synchronizes the state on disk to the one in memory, and does some memory-management. 1845 ///Should be called on a regular basis. Can be called centrally from the global item repository registry. 1846 void store() final 1847 { 1848 if (m_file) { 1849 if (!m_file->open(QFile::ReadWrite) || !m_dynamicFile->open(QFile::ReadWrite)) { 1850 qFatal("cannot re-open repository file for storing"); 1851 return; 1852 } 1853 1854 for (int a = 0; a < m_buckets.size(); ++a) { 1855 auto& bucket = m_buckets[a]; 1856 if (bucket) { 1857 if (bucket->changed()) { 1858 storeBucket(a); 1859 } 1860 if (m_unloadingEnabled) { 1861 const int unloadAfterTicks = 2; 1862 if (bucket->lastUsed() > unloadAfterTicks) { 1863 delete bucket; 1864 bucket = nullptr; 1865 } else { 1866 bucket->tick(); 1867 } 1868 } 1869 } 1870 } 1871 1872 if (m_metaDataChanged) { 1873 writeMetadata(); 1874 } 1875 //To protect us from inconsistency due to crashes. flush() is not enough. We need to close. 1876 m_file->close(); 1877 m_dynamicFile->close(); 1878 Q_ASSERT(!m_file->isOpen()); 1879 Q_ASSERT(!m_dynamicFile->isOpen()); 1880 } 1881 } 1882 1883 bool open(const QString& path) final 1884 { 1885 using namespace ItemRepositoryUtils; 1886 1887 close(); 1888 // qDebug() << "opening repository" << m_repositoryName << "at" << path; 1889 QDir dir(path); 1890 m_file = new QFile(dir.absoluteFilePath(m_repositoryName)); 1891 m_dynamicFile = new QFile(dir.absoluteFilePath(m_repositoryName + QLatin1String("_dynamic"))); 1892 if (!m_file->open(QFile::ReadWrite) || !m_dynamicFile->open(QFile::ReadWrite)) { 1893 delete m_file; 1894 m_file = nullptr; 1895 delete m_dynamicFile; 1896 m_dynamicFile = nullptr; 1897 return false; 1898 } 1899 1900 m_metaDataChanged = true; 1901 if (m_file->size() == 0) { 1902 m_statBucketHashClashes = m_statItemCount = 0; 1903 1904 Q_ASSERT(m_buckets.isEmpty()); 1905 Q_ASSERT(m_freeSpaceBuckets.isEmpty()); 1906 allocateNextBuckets(ItemRepositoryBucketLinearGrowthFactor); 1907 1908 std::fill_n(m_firstBucketForHash, bucketHashSize, 0); 1909 1910 // Skip the first bucket, we won't use it so we have the zero indices for special purposes 1911 Q_ASSERT(m_currentBucket == 1); 1912 1913 // -1 because the 0 bucket is reserved for special purposes 1914 Q_ASSERT(m_freeSpaceBuckets.size() == m_buckets.size() - 1); 1915 1916 writeMetadata(); 1917 1918 // We have completely initialized the file now 1919 if (m_file->pos() != BucketStartOffset) { 1920 KMessageBox::error(nullptr, 1921 i18n("Failed writing to %1, probably the disk is full", m_file->fileName())); 1922 abort(); 1923 } 1924 1925 } else { 1926 m_file->close(); 1927 bool res = m_file->open(QFile::ReadOnly); // Re-open in read-only mode, so we create a read-only m_fileMap 1928 VERIFY(res); 1929 // Check that the version is correct 1930 uint storedVersion = 0, hashSize = 0, itemRepositoryVersion = 0; 1931 1932 readValue(m_file, &storedVersion); 1933 readValue(m_file, &hashSize); 1934 readValue(m_file, &itemRepositoryVersion); 1935 readValue(m_file, &m_statBucketHashClashes); 1936 readValue(m_file, &m_statItemCount); 1937 1938 if (storedVersion != m_repositoryVersion || hashSize != bucketHashSize 1939 || itemRepositoryVersion != staticItemRepositoryVersion()) { 1940 qDebug() << "repository" << m_repositoryName << "version mismatch in" << m_file->fileName() 1941 << ", stored: version " << storedVersion << "hashsize" << hashSize << "repository-version" 1942 << itemRepositoryVersion << " current: version" << m_repositoryVersion << "hashsize" 1943 << bucketHashSize << "repository-version" << staticItemRepositoryVersion(); 1944 delete m_file; 1945 m_file = nullptr; 1946 delete m_dynamicFile; 1947 m_dynamicFile = nullptr; 1948 return false; 1949 } 1950 m_metaDataChanged = false; 1951 1952 uint bucketCount = 0; 1953 readValue(m_file, &bucketCount); 1954 1955 Q_ASSERT(m_buckets.isEmpty()); 1956 Q_ASSERT(m_freeSpaceBuckets.isEmpty()); 1957 // not calling allocateNextBuckets here, as we load buckets from file here, not new/next ones 1958 m_buckets.resize(bucketCount); 1959 1960 readValue(m_file, &m_currentBucket); 1961 Q_ASSERT(m_currentBucket); 1962 1963 readValues(m_file, bucketHashSize, m_firstBucketForHash); 1964 1965 Q_ASSERT(m_file->pos() == BucketStartOffset); 1966 1967 uint freeSpaceBucketsSize = 0; 1968 readValue(m_dynamicFile, &freeSpaceBucketsSize); 1969 m_freeSpaceBuckets.resize(freeSpaceBucketsSize); 1970 readList(m_dynamicFile, &m_freeSpaceBuckets); 1971 1972 m_monsterBucketTailMarker.resize(bucketCount); 1973 readList(m_dynamicFile, &m_monsterBucketTailMarker); 1974 } 1975 1976 m_fileMapSize = 0; 1977 m_fileMap = nullptr; 1978 1979 #ifdef ITEMREPOSITORY_USE_MMAP_LOADING 1980 if (m_file->size() > BucketStartOffset) { 1981 m_fileMap = m_file->map(BucketStartOffset, m_file->size() - BucketStartOffset); 1982 Q_ASSERT(m_file->isOpen()); 1983 Q_ASSERT(m_file->size() >= BucketStartOffset); 1984 if (m_fileMap) { 1985 m_fileMapSize = m_file->size() - BucketStartOffset; 1986 } else { 1987 qWarning() << "mapping" << m_file->fileName() << "FAILED!"; 1988 } 1989 } 1990 #endif 1991 // To protect us from inconsistency due to crashes. flush() is not enough. 1992 m_file->close(); 1993 m_dynamicFile->close(); 1994 1995 return true; 1996 } 1997 1998 ///@warning by default, this does not store the current state to disk. 1999 void close(bool doStore = false) final 2000 { 2001 if (doStore) 2002 store(); 2003 2004 if (m_file) 2005 m_file->close(); 2006 delete m_file; 2007 m_file = nullptr; 2008 m_fileMap = nullptr; 2009 m_fileMapSize = 0; 2010 2011 if (m_dynamicFile) 2012 m_dynamicFile->close(); 2013 delete m_dynamicFile; 2014 m_dynamicFile = nullptr; 2015 2016 qDeleteAll(m_buckets); 2017 m_buckets.clear(); 2018 2019 std::fill_n(m_firstBucketForHash, bucketHashSize, 0); 2020 } 2021 2022 int finalCleanup() final 2023 { 2024 int changed = 0; 2025 for (int a = 1; a <= m_currentBucket; ++a) { 2026 MyBucket* bucket = bucketForIndex(a); 2027 if (bucket && bucket->dirty()) { ///@todo Faster dirty check, without loading bucket 2028 changed += bucket->finalCleanup(*this); 2029 } 2030 a += bucket->monsterBucketExtent(); // Skip buckets that are attached as tail to monster-buckets 2031 } 2032 2033 return changed; 2034 } 2035 2036 uint usedMemory() const 2037 { 2038 uint used = 0; 2039 for (auto* bucket : m_buckets) { 2040 if (bucket) { 2041 used += bucket->usedMemory(); 2042 } 2043 } 2044 2045 return used; 2046 } 2047 2048 uint createIndex(ushort bucketIndex, ushort indexInBucket) const 2049 { 2050 //Combine the index in the bucket, and the bucket number into one index 2051 const uint index = (bucketIndex << 16) + indexInBucket; 2052 verifyIndex(index); 2053 return index; 2054 } 2055 2056 /** 2057 * Walks through all buckets clashing with @p hash 2058 * 2059 * Will return the value returned by the lambda, returning early if truthy 2060 */ 2061 template <typename Visitor> 2062 auto walkBucketChain(unsigned int hash, const Visitor& visitor) const->decltype(visitor(0, nullptr)) 2063 { 2064 unsigned short bucketIndex = m_firstBucketForHash[hash % bucketHashSize]; 2065 2066 while (bucketIndex) { 2067 auto* bucketPtr = bucketForIndex(bucketIndex); 2068 2069 if (auto visitResult = visitor(bucketIndex, bucketPtr)) { 2070 return visitResult; 2071 } 2072 2073 bucketIndex = bucketPtr->nextBucketForHash(hash); 2074 } 2075 2076 return {}; // clazy:exclude=returning-void-expression 2077 } 2078 2079 ///Makes sure the order within m_freeSpaceBuckets is correct, after largestFreeSize has been changed for m_freeSpaceBuckets[index]. 2080 ///If too few space is free within the given bucket, it is removed from m_freeSpaceBuckets. 2081 void updateFreeSpaceOrder(uint index) 2082 { 2083 m_metaDataChanged = true; 2084 2085 unsigned int* freeSpaceBuckets = m_freeSpaceBuckets.data(); 2086 2087 Q_ASSERT(index < static_cast<uint>(m_freeSpaceBuckets.size())); 2088 MyBucket* bucketPtr = bucketForIndex(freeSpaceBuckets[index]); 2089 2090 unsigned short largestFreeSize = bucketPtr->largestFreeSize(); 2091 2092 if (largestFreeSize == 0 || 2093 (bucketPtr->freeItemCount() <= MyBucket::MaxFreeItemsForHide && 2094 largestFreeSize <= MyBucket::MaxFreeSizeForHide)) { 2095 //Remove the item from freeSpaceBuckets 2096 m_freeSpaceBuckets.remove(index); 2097 } else { 2098 while (1) { 2099 int prev = index - 1; 2100 int next = index + 1; 2101 if (prev >= 0 && (bucketForIndex(freeSpaceBuckets[prev])->largestFreeSize() > largestFreeSize || 2102 (bucketForIndex(freeSpaceBuckets[prev])->largestFreeSize() == largestFreeSize && 2103 freeSpaceBuckets[index] < freeSpaceBuckets[prev])) 2104 ) { 2105 //This item should be behind the successor, either because it has a lower largestFreeSize, or because the index is lower 2106 uint oldPrevValue = freeSpaceBuckets[prev]; 2107 freeSpaceBuckets[prev] = freeSpaceBuckets[index]; 2108 freeSpaceBuckets[index] = oldPrevValue; 2109 index = prev; 2110 } else if (next < m_freeSpaceBuckets.size() 2111 && (bucketForIndex(freeSpaceBuckets[next])->largestFreeSize() < largestFreeSize 2112 || (bucketForIndex(freeSpaceBuckets[next])->largestFreeSize() == largestFreeSize 2113 && freeSpaceBuckets[index] > freeSpaceBuckets[next]))) { 2114 //This item should be behind the successor, either because it has a higher largestFreeSize, or because the index is higher 2115 uint oldNextValue = freeSpaceBuckets[next]; 2116 freeSpaceBuckets[next] = freeSpaceBuckets[index]; 2117 freeSpaceBuckets[index] = oldNextValue; 2118 index = next; 2119 } else { 2120 break; 2121 } 2122 } 2123 } 2124 } 2125 2126 ///Does conversion from monster-bucket to normal bucket and from normal bucket to monster-bucket 2127 ///The bucket @param bucketNumber must already be loaded and empty. the "extent" buckets behind must also be loaded, 2128 ///and also be empty. 2129 ///The created buckets are not registered anywhere. When converting from monster-bucket to normal bucket, 2130 ///oldExtent+1 normal buckets are created, that must be registered somewhere. 2131 ///@warning During conversion, all the touched buckets are deleted and re-created 2132 ///@param extent When this is zero, the bucket is converted from monster-bucket to normal bucket. 2133 /// When it is nonzero, it is converted to a monster-bucket. 2134 MyBucket* convertMonsterBucket(int bucketNumber, int extent) 2135 { 2136 m_metaDataChanged = true; 2137 2138 Q_ASSERT(bucketNumber); 2139 auto* bucketPtr = bucketForIndex(bucketNumber); 2140 2141 // the bucket may have encountered hash clashes in the past, we need to keep that data alive 2142 // note that all following buckets that got merged with the first bucket to create the monster 2143 // are guaranteed to _not_ have any next buckets, which is asserted in `deleteBucket` 2144 auto oldNextBucketHash = bucketPtr->takeNextBucketHash(); 2145 2146 if (extent) { 2147 //Convert to monster-bucket 2148 Q_ASSERT(bucketPtr->isEmpty()); 2149 2150 const auto monsterStart = bucketNumber; 2151 const auto monsterEnd = monsterStart + extent + 1; 2152 2153 // NOTE: see assertion below, when we get here then the entry for `bucketNumber in `m_freeSpaceBuckets` 2154 // will be followed by the entries for the following buckets, because they are all free and thus 2155 // the second level ordering by bucket index is all that matters 2156 const auto freeSpaceIndex = m_freeSpaceBuckets.indexOf(bucketNumber); 2157 Q_ASSERT(freeSpaceIndex != -1); 2158 2159 #ifdef DEBUG_MONSTERBUCKETS 2160 for (int offset = 0; offset < extent + 1; ++offset) { 2161 auto bucket = bucketForIndex(bucketNumber + offset); 2162 Q_ASSERT(bucket->isEmpty()); 2163 Q_ASSERT(!bucket->monsterBucketExtent()); 2164 Q_ASSERT(!m_monsterBucketTailMarker[bucketNumber + offset]); 2165 // verify that the m_freeSpaceBuckets is ordered the way we expect it to 2166 Q_ASSERT(m_freeSpaceBuckets[freeSpaceIndex + offset] == static_cast<uint>(bucketNumber + offset)); 2167 } 2168 #endif 2169 2170 // the following buckets are not free anymore, they get merged into the monster 2171 // NOTE: we assert above that the order of the entries is correct 2172 m_freeSpaceBuckets.remove(freeSpaceIndex, extent + 1); 2173 2174 for (int index = monsterStart; index < monsterEnd; ++index) 2175 deleteBucket(index); 2176 2177 bucketPtr = new MyBucket(); 2178 bucketPtr->initialize(extent, std::move(oldNextBucketHash)); 2179 2180 Q_ASSERT(!m_buckets[bucketNumber]); 2181 m_buckets[bucketNumber] = bucketPtr; 2182 2183 // mark the tail of the monster bucket 2184 std::fill(std::next(m_monsterBucketTailMarker.begin(), monsterStart + 1), 2185 std::next(m_monsterBucketTailMarker.begin(), monsterEnd), true); 2186 2187 #ifdef DEBUG_MONSTERBUCKETS 2188 // all following buckets are deleted and not marked as free anymore 2189 for (int index = monsterStart + 1; index < monsterEnd; ++index) { 2190 Q_ASSERT(!m_buckets[index]); 2191 Q_ASSERT(!m_freeSpaceBuckets.contains(index)); 2192 2193 // all tail buckets are marked as part of the monster now 2194 Q_ASSERT(m_monsterBucketTailMarker[index]); 2195 } 2196 #endif 2197 2198 // but the new monster bucket is still free 2199 Q_ASSERT(bucketPtr->isEmpty()); 2200 // and it itself is not marked as a tail, to allow us to still look things up in it directly 2201 Q_ASSERT(!m_monsterBucketTailMarker[bucketNumber]); 2202 // monster buckets don't get put into the m_freeSpaceBuckets list 2203 Q_ASSERT(!m_freeSpaceBuckets.contains(bucketNumber)); 2204 } else { 2205 const auto oldExtent = bucketPtr->monsterBucketExtent(); 2206 const auto oldMonsterStart = bucketNumber; 2207 const auto oldMonsterEnd = oldMonsterStart + oldExtent + 1; 2208 2209 Q_ASSERT(bucketPtr->monsterBucketExtent()); 2210 Q_ASSERT(bucketPtr->isEmpty()); 2211 // while empty, a monster bucket never is put into the m_freeSpaceBuckets list 2212 Q_ASSERT(!m_freeSpaceBuckets.contains(bucketNumber)); 2213 // the monster itself is not a tail 2214 Q_ASSERT(!m_monsterBucketTailMarker[bucketNumber]); 2215 2216 #ifdef DEBUG_MONSTERBUCKETS 2217 // all buckets that are part of the monster are marked as such 2218 for (int index = oldMonsterStart + 1; index < oldMonsterEnd; ++index) { 2219 Q_ASSERT(m_monsterBucketTailMarker[index]); 2220 } 2221 #endif 2222 2223 // delete the monster bucket 2224 deleteBucket(bucketNumber); 2225 2226 // remove markers for the tail of the monster bucket 2227 std::fill(std::next(m_monsterBucketTailMarker.begin(), oldMonsterStart + 1), 2228 std::next(m_monsterBucketTailMarker.begin(), oldMonsterEnd), false); 2229 2230 // recreate non-monster buckets 2231 for (int index = oldMonsterStart; index < oldMonsterEnd; ++index) { 2232 auto& bucket = m_buckets[index]; 2233 Q_ASSERT(!bucket); 2234 2235 bucket = new MyBucket(); 2236 2237 if (index == oldMonsterStart) { 2238 bucket->initialize(0, std::move(oldNextBucketHash)); 2239 bucketPtr = bucket; 2240 } else { 2241 bucket->initialize(0); 2242 } 2243 2244 Q_ASSERT(!bucket->monsterBucketExtent()); 2245 Q_ASSERT(!m_freeSpaceBuckets.contains(index)); 2246 2247 putIntoFreeList(index, bucket); 2248 2249 Q_ASSERT(m_freeSpaceBuckets.contains(index)); 2250 Q_ASSERT(!m_monsterBucketTailMarker[index]); 2251 } 2252 } 2253 2254 Q_ASSERT(bucketPtr == m_buckets[bucketNumber]); 2255 return bucketPtr; 2256 } 2257 2258 MyBucket* bucketForIndex(short unsigned int index) const 2259 { 2260 Q_ASSERT(index); 2261 MyBucket* bucketPtr = m_buckets.at(index); 2262 if (!bucketPtr) { 2263 bucketPtr = initializeBucket(index); 2264 } 2265 return bucketPtr; 2266 } 2267 2268 struct AllItemsReachableVisitor { 2269 explicit AllItemsReachableVisitor(ItemRepository* rep) 2270 : repository(rep) 2271 { 2272 } 2273 2274 bool operator()(const Item* item) { return repository->itemReachable(item); } 2275 2276 ItemRepository* repository; 2277 }; 2278 2279 // Returns whether the given item is reachable through its hash 2280 bool itemReachable(const Item* item) const 2281 { 2282 const uint hash = item->hash(); 2283 2284 return walkBucketChain(hash, [=](ushort /*bucketIndex*/, const MyBucket* bucketPtr) { 2285 return bucketPtr->itemReachable(item, hash); 2286 }); 2287 } 2288 2289 // Returns true if all items in the given bucket are reachable through their hashes 2290 bool allItemsReachable(unsigned short bucket) 2291 { 2292 if (!bucket) 2293 return true; 2294 2295 MyBucket* bucketPtr = bucketForIndex(bucket); 2296 2297 AllItemsReachableVisitor visitor(this); 2298 return bucketPtr->visitAllItems(visitor); 2299 } 2300 2301 inline MyBucket* initializeBucket(int bucketNumber) const 2302 { 2303 using namespace ItemRepositoryUtils; 2304 2305 Q_ASSERT(bucketNumber); 2306 #ifdef DEBUG_MONSTERBUCKETS 2307 // ensure that the previous N buckets are no monster buckets that overlap the requested bucket 2308 for (int offset = 1; offset < 5; ++offset) { 2309 int test = bucketNumber - offset; 2310 if (test >= 0 && m_buckets[test]) { 2311 Q_ASSERT(m_buckets[test]->monsterBucketExtent() < offset); 2312 } 2313 } 2314 2315 #endif 2316 2317 auto& bucket = m_buckets[bucketNumber]; 2318 if (!bucket) { 2319 bucket = new MyBucket(); 2320 2321 bool doMMapLoading = ( bool )m_fileMap; 2322 2323 uint offset = ((bucketNumber - 1) * MyBucket::DataSize); 2324 if (m_file && offset < m_fileMapSize && doMMapLoading && 2325 *reinterpret_cast<uint*>(m_fileMap + offset) == 0) { 2326 // qDebug() << "loading bucket mmap:" << bucketNumber; 2327 bucket->initializeFromMap(reinterpret_cast<char*>(m_fileMap + offset)); 2328 } else if (m_file) { 2329 //Either memory-mapping is disabled, or the item is not in the existing memory-map, 2330 //so we have to load it the classical way. 2331 bool res = m_file->open(QFile::ReadOnly); 2332 2333 if (offset + BucketStartOffset < m_file->size()) { 2334 VERIFY(res); 2335 offset += BucketStartOffset; 2336 m_file->seek(offset); 2337 int monsterBucketExtent; 2338 readValue(m_file, &monsterBucketExtent); 2339 m_file->seek(offset); 2340 ///FIXME: use the data here instead of copying it again in prepareChange 2341 QByteArray data = m_file->read((1 + monsterBucketExtent) * MyBucket::DataSize); 2342 bucket->initializeFromMap(data.data()); 2343 bucket->prepareChange(); 2344 } else { 2345 bucket->initialize(0); 2346 } 2347 2348 m_file->close(); 2349 } else { 2350 bucket->initialize(0); 2351 } 2352 } else { 2353 bucket->initialize(0); 2354 } 2355 2356 return bucket; 2357 } 2358 2359 ///Can only be called on empty buckets 2360 void deleteBucket(int bucketNumber) 2361 { 2362 // NOTE: use bucketForIndex in the assertions here, as the bucket may not be initialized 2363 // we still want to verify that we only delete empty buckets though 2364 Q_ASSERT(bucketForIndex(bucketNumber)->isEmpty()); 2365 Q_ASSERT(bucketForIndex(bucketNumber)->noNextBuckets()); 2366 Q_ASSERT(!m_freeSpaceBuckets.contains(bucketNumber)); 2367 2368 auto& bucket = m_buckets[bucketNumber]; 2369 delete bucket; 2370 bucket = nullptr; 2371 } 2372 2373 //m_file must be opened 2374 void storeBucket(int bucketNumber) const 2375 { 2376 if (!m_file) { 2377 return; 2378 } 2379 auto& bucket = m_buckets[bucketNumber]; 2380 if (bucket) { 2381 bucket->store(m_file, BucketStartOffset + (bucketNumber - 1) * MyBucket::DataSize); 2382 } 2383 } 2384 2385 /// If mustFindBucket is zero, the whole chain is just walked. This is good for debugging for infinite recursion. 2386 /// @return whether @p mustFindBucket was found 2387 bool walkBucketLinks(uint checkBucket, uint hash, uint mustFindBucket = 0) const 2388 { 2389 bool found = false; 2390 while (checkBucket) { 2391 if (checkBucket == mustFindBucket) 2392 found = true; 2393 2394 checkBucket = bucketForIndex(checkBucket)->nextBucketForHash(hash); 2395 } 2396 return found || (mustFindBucket == 0); 2397 } 2398 2399 /// Computes the bucket where the chains opened by the buckets @p mainHead and @p intersectorHead 2400 /// with hash @p hash meet each other. 2401 /// @return <predecessor of first shared bucket in mainHead, first shared bucket> 2402 QPair<unsigned int, unsigned int> hashChainIntersection(uint mainHead, uint intersectorHead, uint hash) const 2403 { 2404 uint previous = 0; 2405 uint current = mainHead; 2406 while (current) { 2407 ///@todo Make this more efficient 2408 if (walkBucketLinks(intersectorHead, hash, current)) 2409 return qMakePair(previous, current); 2410 2411 previous = current; 2412 current = bucketForIndex(current)->nextBucketForHash(hash); 2413 } 2414 return qMakePair(0u, 0u); 2415 } 2416 2417 void putIntoFreeList(unsigned short bucket, MyBucket* bucketPtr) 2418 { 2419 Q_ASSERT(bucket); 2420 2421 Q_ASSERT(!bucketPtr->monsterBucketExtent()); 2422 int indexInFree = m_freeSpaceBuckets.indexOf(bucket); 2423 2424 if (indexInFree == -1 && 2425 (bucketPtr->freeItemCount() >= MyBucket::MinFreeItemsForReuse || 2426 bucketPtr->largestFreeSize() >= MyBucket::MinFreeSizeForReuse)) { 2427 //Add the bucket to the list of buckets from where to re-assign free space 2428 //We only do it when a specific threshold of empty items is reached, because that way items can stay "somewhat" semantically ordered. 2429 Q_ASSERT(bucketPtr->largestFreeSize()); 2430 int insertPos; 2431 for (insertPos = 0; insertPos < m_freeSpaceBuckets.size(); ++insertPos) { 2432 if (bucketForIndex(m_freeSpaceBuckets[insertPos])->largestFreeSize() > bucketPtr->largestFreeSize()) 2433 break; 2434 } 2435 2436 m_freeSpaceBuckets.insert(insertPos, bucket); 2437 2438 updateFreeSpaceOrder(insertPos); 2439 } else if (indexInFree != -1) { 2440 ///Re-order so the order in m_freeSpaceBuckets is correct(sorted by largest free item size) 2441 updateFreeSpaceOrder(indexInFree); 2442 } 2443 2444 // empty buckets definitely should be in the free space list 2445 Q_ASSERT(!bucketPtr->isEmpty() || m_freeSpaceBuckets.contains(bucket)); 2446 } 2447 2448 void verifyIndex(uint index) const 2449 { 2450 // We don't use zero indices 2451 Q_ASSERT(index); 2452 int bucket = (index >> 16); 2453 // nor zero buckets 2454 Q_ASSERT(bucket); 2455 Q_ASSERT_X(bucket < m_buckets.size(), Q_FUNC_INFO, 2456 qPrintable(QStringLiteral("index %1 gives invalid bucket number %2, current count is: %3") 2457 .arg(index) 2458 .arg(bucket) 2459 .arg(m_buckets.size()))); 2460 2461 // don't trigger compile warnings in release mode 2462 Q_UNUSED(bucket); 2463 Q_UNUSED(index); 2464 } 2465 2466 void allocateNextBuckets(int numNewBuckets) 2467 { 2468 Q_ASSERT(numNewBuckets > 0); 2469 const auto oldSize = m_buckets.size(); 2470 m_buckets.resize(oldSize + numNewBuckets); 2471 m_monsterBucketTailMarker.resize(m_buckets.size()); 2472 2473 // the buckets we just created can by definition not yet exist and must be empty 2474 // meaning we can bypass loading from file _and_ we must add them to the freeSpaceBuckets too 2475 for (int i = oldSize; i < (oldSize + numNewBuckets); ++i) { 2476 if (i == 0) // see below, the zero bucket is reserved for special purposes 2477 continue; 2478 2479 auto& bucket = m_buckets[i]; 2480 Q_ASSERT(!bucket); 2481 2482 bucket = new MyBucket; 2483 bucket->initialize(0); 2484 2485 Q_ASSERT(bucket->isEmpty()); 2486 Q_ASSERT(!bucket->monsterBucketExtent()); 2487 Q_ASSERT(!m_monsterBucketTailMarker[i]); 2488 2489 putIntoFreeList(i, bucket); 2490 Q_ASSERT(m_freeSpaceBuckets.contains(i)); 2491 } 2492 2493 // Skip the first bucket, we won't use it so we have the zero indices for special purposes 2494 if (m_currentBucket == 0) 2495 m_currentBucket = 1; 2496 } 2497 2498 bool m_metaDataChanged = true; 2499 bool m_unloadingEnabled = true; 2500 // an unused, empty repo has no bucket yet 2501 // on first use this will then get incremented directly as we never use the zero-bucket index 2502 // that value is reserved for special purposes instead 2503 mutable int m_currentBucket = 0; 2504 2505 //List of buckets that have free space available that can be assigned. Sorted by size: Smallest space first. Second order sorting: Bucket index 2506 QVector<uint> m_freeSpaceBuckets; 2507 // every bucket that is part of a monster bucket but not the "main" bucket gets marked in this list, 2508 // this allows us to ensure we don't try to put an entry into such a bucket later on, which would corrupt data 2509 QVector<bool> m_monsterBucketTailMarker; 2510 //List of hash map buckets that actually hold the data of the item repository 2511 mutable QVector<MyBucket*> m_buckets; 2512 uint m_statBucketHashClashes = 0; 2513 uint m_statItemCount = 0; 2514 //Maps hash-values modulo 1<<bucketHashSizeBits to the first bucket such a hash-value appears in 2515 short unsigned int m_firstBucketForHash[bucketHashSize] = { 0 }; 2516 2517 //File that contains the buckets 2518 QFile* m_file = nullptr; 2519 uchar* m_fileMap = nullptr; 2520 uint m_fileMapSize = 0; 2521 //File that contains more dynamic data, like the list of buckets with deleted items 2522 QFile* m_dynamicFile = nullptr; 2523 2524 const QString m_repositoryName; 2525 const uint m_repositoryVersion; 2526 Mutex* const m_mutex; 2527 ItemRepositoryRegistry* const m_registry; 2528 2529 friend class ::TestItemRepository; 2530 friend class ::BenchItemRepository; 2531 }; 2532 2533 template<typename Context> 2534 class ItemRepositoryFor; 2535 2536 struct LockedItemRepository { 2537 template<typename Context, typename Op> 2538 static auto read(Op&& op) 2539 { 2540 const auto& repo = ItemRepositoryFor<Context>::repo(); 2541 2542 QMutexLocker lock(repo.mutex()); 2543 return op(repo); 2544 } 2545 2546 template<typename Context, typename Op> 2547 static auto write(Op&& op) 2548 { 2549 auto& repo = ItemRepositoryFor<Context>::repo(); 2550 2551 QMutexLocker lock(repo.mutex()); 2552 return op(repo); 2553 } 2554 2555 template<typename Context> 2556 static void initialize() 2557 { 2558 ItemRepositoryFor<Context>::repo(); 2559 } 2560 }; 2561 } 2562 2563 #endif