File indexing completed on 2024-05-19 04:39:27
0001 #include "itemrepositorytestbase.h" 0002 0003 #include <QTest> 0004 0005 // enable various debug facilities in the ItemRepository code 0006 // make sure to keep this block on the top, to ensure the include of itemrepository.h 0007 // below picks it up correctly 0008 0009 #define DEBUG_MONSTERBUCKETS 1 0010 #define DEBUG_ITEMREPOSITORY_LOADING 1 0011 #define DEBUG_ITEM_REACHABILITY 1 0012 #define DEBUG_INCORRECT_DELETE 1 0013 0014 #include <serialization/indexedstring.h> 0015 #include <serialization/itemrepository.h> 0016 0017 #include <cstdlib> 0018 #include <ctime> 0019 0020 #include <algorithm> 0021 #include <memory> 0022 #include <numeric> 0023 #include <utility> 0024 #include <vector> 0025 0026 using namespace KDevelop; 0027 0028 struct TestItem 0029 { 0030 TestItem(uint hash, uint dataSize) : m_hash(hash) 0031 , m_dataSize(dataSize) 0032 { 0033 } 0034 0035 TestItem& operator=(const TestItem& rhs) = delete; 0036 0037 //Every item has to implement this function, and return a valid hash. 0038 //Must be exactly the same hash value as ExampleItemRequest::hash() has returned while creating the item. 0039 unsigned int hash() const 0040 { 0041 return m_hash; 0042 } 0043 0044 //Every item has to implement this function, and return the complete size this item takes in memory. 0045 //Must be exactly the same value as ExampleItemRequest::itemSize() has returned while creating the item. 0046 unsigned int itemSize() const 0047 { 0048 return sizeof(TestItem) + m_dataSize; 0049 } 0050 0051 bool equals(const TestItem* rhs) const 0052 { 0053 return rhs->m_hash == m_hash 0054 && itemSize() == rhs->itemSize() 0055 && memcmp(reinterpret_cast<const char*>(this), rhs, itemSize()) == 0; 0056 } 0057 0058 uint m_hash; 0059 uint m_dataSize; 0060 }; 0061 0062 struct TestItemRequest 0063 { 0064 TestItem& m_item; 0065 bool m_compareData; 0066 0067 TestItemRequest(TestItem& item, bool compareData = false) 0068 : m_item(item) 0069 , m_compareData(compareData) 0070 { 0071 } 0072 enum { 0073 AverageSize = 700 //This should be the approximate average size of an Item 0074 }; 0075 0076 uint hash() const 0077 { 0078 return m_item.hash(); 0079 } 0080 0081 //Should return the size of an item created with createItem 0082 uint itemSize() const 0083 { 0084 return m_item.itemSize(); 0085 } 0086 0087 void createItem(TestItem* item) const 0088 { 0089 memcpy(reinterpret_cast<void*>(item), &m_item, m_item.itemSize()); 0090 } 0091 0092 static void destroy(TestItem* /*item*/, AbstractItemRepository&) 0093 { 0094 //Nothing to do 0095 } 0096 0097 static bool persistent(const TestItem* /*item*/) 0098 { 0099 return true; 0100 } 0101 0102 //Should return whether the here requested item equals the given item 0103 bool equals(const TestItem* item) const 0104 { 0105 return hash() == item->hash() && (!m_compareData || m_item.equals(item)); 0106 } 0107 }; 0108 0109 uint smallItemsFraction = 20; //Fraction of items between 0 and 1 kb 0110 uint largeItemsFraction = 1; //Fraction of items between 0 and 200 kb 0111 uint cycles = 10000; 0112 uint deletionProbability = 1; //Percentual probability that a checked item is deleted. Per-cycle probability must be multiplied with checksPerCycle. 0113 uint checksPerCycle = 10; 0114 0115 TestItem* createItem(uint id, uint size) 0116 { 0117 TestItem* ret; 0118 char* data = new char[size]; 0119 uint dataSize = size - sizeof(TestItem); 0120 ret = new (data) TestItem(id, dataSize); 0121 0122 //Fill in same random pattern 0123 for (uint a = 0; a < dataSize; ++a) 0124 data[sizeof(TestItem) + a] = ( char )(a + id); 0125 0126 return ret; 0127 } 0128 0129 ///@todo Add a test where the complete content is deleted again, and make sure the result has a nice structure 0130 ///@todo More consistency and lost-space tests, especially about monster-buckets. Make sure their space is re-claimed 0131 class TestItemRepository 0132 : public ItemRepositoryTestBase 0133 { 0134 Q_OBJECT 0135 0136 private Q_SLOTS: 0137 void testItemRepository() 0138 { 0139 QMutex mutex; 0140 ItemRepository<TestItem, TestItemRequest> repository(QStringLiteral("TestItemRepository"), &mutex); 0141 0142 // calling statistics here does some extended tests and shouldn't crash or assert 0143 repository.statistics(); 0144 0145 uint itemId = 0; 0146 QHash<uint, TestItem*> realItemsByIndex; 0147 QHash<uint, TestItem*> realItemsById; 0148 uint totalInsertions = 0, totalDeletions = 0; 0149 uint maxSize = 0; 0150 uint totalSize = 0; 0151 srand(12345); 0152 uint highestSeenIndex = 0; 0153 0154 for (uint a = 0; a < cycles; ++a) { 0155 { 0156 //Insert an item 0157 uint itemDecision = rand() % (smallItemsFraction + largeItemsFraction); 0158 uint itemSize; 0159 if (itemDecision < largeItemsFraction) { 0160 //Create a large item: Up to 200kb 0161 itemSize = (rand() % 200000) + sizeof(TestItem); 0162 } else 0163 itemSize = (rand() % 1000) + sizeof(TestItem); 0164 TestItem* item = createItem(++itemId, itemSize); 0165 Q_ASSERT(item->hash() == itemId); 0166 QVERIFY(item->equals(item)); 0167 uint index = repository.index(TestItemRequest(*item)); 0168 if (index > highestSeenIndex) 0169 highestSeenIndex = index; 0170 Q_ASSERT(index); 0171 realItemsByIndex.insert(index, item); 0172 realItemsById.insert(itemId, item); 0173 ++totalInsertions; 0174 totalSize += itemSize; 0175 if (itemSize > maxSize) 0176 maxSize = itemSize; 0177 } 0178 0179 for (uint a = 0; a < checksPerCycle; ++a) { 0180 //Check an item 0181 uint pick = rand() % itemId; 0182 if (realItemsById.contains(pick)) { 0183 uint index = repository.findIndex(*realItemsById[pick]); 0184 QVERIFY(index); 0185 QVERIFY(realItemsByIndex.contains(index)); 0186 QVERIFY(realItemsByIndex[index]->equals(repository.itemFromIndex(index))); 0187 0188 if (( uint ) (rand() % 100) < deletionProbability) { 0189 ++totalDeletions; 0190 //Delete the item 0191 repository.deleteItem(index); 0192 QVERIFY(!repository.findIndex(*realItemsById[pick])); 0193 uint newIndex = repository.index(*realItemsById[pick]); 0194 QVERIFY(newIndex); 0195 QVERIFY(realItemsByIndex[index]->equals(repository.itemFromIndex(newIndex))); 0196 0197 #ifdef POSITION_TEST 0198 //Since we have previously deleted the item, there must be enough space 0199 if (!((newIndex >> 16) <= (highestSeenIndex >> 16))) { 0200 qDebug() << "size:" << realItemsById[pick]->itemSize(); 0201 qDebug() << "previous highest seen bucket:" << (highestSeenIndex >> 16); 0202 qDebug() << "new bucket:" << (newIndex >> 16); 0203 } 0204 QVERIFY((newIndex >> 16) <= (highestSeenIndex >> 16)); 0205 #endif 0206 repository.deleteItem(newIndex); 0207 QVERIFY(!repository.findIndex(*realItemsById[pick])); 0208 delete[] realItemsById[pick]; 0209 realItemsById.remove(pick); 0210 realItemsByIndex.remove(index); 0211 } 0212 } 0213 } 0214 0215 // calling statistics here does some extended tests and shouldn't crash or assert 0216 repository.statistics(); 0217 } 0218 0219 // cleanup 0220 { 0221 for (auto it = realItemsByIndex.constBegin(); it != realItemsByIndex.constEnd(); ++it) { 0222 repository.deleteItem(it.key()); 0223 delete[] it.value(); 0224 } 0225 0226 realItemsById.clear(); 0227 realItemsByIndex.clear(); 0228 } 0229 0230 qDebug() << "total insertions:" << totalInsertions << "total deletions:" << totalDeletions << 0231 "average item size:" << (totalSize / totalInsertions) << "biggest item size:" << maxSize; 0232 0233 const auto stats = repository.statistics(); 0234 qDebug() << stats; 0235 QVERIFY(stats.freeUnreachableSpace < stats.freeSpaceInBuckets / 100); // < 1% of the free space is unreachable 0236 QVERIFY(stats.freeSpaceInBuckets < stats.usedSpaceForBuckets); // < 20% free space 0237 } 0238 void testLeaks() 0239 { 0240 srand(12345); 0241 QMutex mutex; 0242 ItemRepository<TestItem, TestItemRequest> repository(QStringLiteral("TestItemRepository"), &mutex); 0243 QList<TestItem*> items; 0244 for (int i = 0; i < 10000; ++i) { 0245 TestItem* item = createItem(i, (rand() % 1000) + sizeof(TestItem)); 0246 items << item; 0247 repository.index(TestItemRequest(*item)); 0248 } 0249 0250 for (auto item : qAsConst(items)) { 0251 delete[] item; 0252 } 0253 } 0254 void testStringSharing() 0255 { 0256 QString qString; 0257 qString.fill('.', 1000); 0258 IndexedString indexedString(qString); 0259 const int repeat = 10000; 0260 QVector<QString> strings; 0261 strings.resize(repeat); 0262 for (int i = 0; i < repeat; ++i) { 0263 strings[i] = indexedString.str(); 0264 QCOMPARE(qString, strings[i]); 0265 } 0266 } 0267 void deleteClashingMonsterBucket_data() 0268 { 0269 QTest::addColumn<QVector<uint>>("itemSizes"); 0270 int testIndex = 0; 0271 0272 auto addRow = [&testIndex](QVector<uint> itemSizes) { 0273 QTest::addRow("%d", testIndex++) << itemSizes; 0274 }; 0275 0276 const auto big = ItemRepositoryBucketSize + 10; 0277 const auto small = 20; 0278 addRow({small, big}); 0279 addRow({small, small + 1}); 0280 addRow({big, big + 1}); 0281 addRow({small, small + 1, small + 2}); 0282 addRow({small, small + 1, big}); 0283 addRow({small, big, big + 1}); 0284 addRow({big, big + 1, big + 2}); 0285 } 0286 void deleteClashingMonsterBucket() 0287 { 0288 QFETCH(QVector<uint>, itemSizes); 0289 0290 std::vector<std::size_t> originalItemIds(itemSizes.size()); 0291 std::iota(originalItemIds.begin(), originalItemIds.end(), 0); 0292 0293 QMutex mutex; 0294 ItemRepository<TestItem, TestItemRequest> repository(QStringLiteral("TestItemRepository"), &mutex); 0295 0296 // repeat the below for multiple hashes 0297 for (auto hash : {1235u, 1236u}) { 0298 // one permutation for the insertion order 0299 auto itemIdsToCreate = originalItemIds; 0300 do { 0301 // one permutation for the deletion order 0302 auto itemIdsToDelete = originalItemIds; 0303 do { 0304 std::vector<uint> repoIndices; 0305 repoIndices.reserve(itemSizes.size()); 0306 std::vector<std::unique_ptr<TestItem[]>> items; 0307 items.reserve(itemSizes.size()); 0308 0309 // create items 0310 for (auto itemId : itemIdsToCreate) { 0311 std::unique_ptr<TestItem[]> item(createItem(hash, itemSizes[itemId])); 0312 const auto itemRequest = TestItemRequest(*item.get(), true); 0313 0314 QVERIFY(!repository.findIndex(itemRequest)); 0315 0316 for (const auto& otherItem : items) 0317 QVERIFY(!otherItem.get()->equals(item.get())); 0318 0319 auto index = repository.index(itemRequest); 0320 QVERIFY(index); 0321 QCOMPARE(repository.findIndex(itemRequest), index); 0322 0323 items.push_back(std::move(item)); 0324 repoIndices.push_back(index); 0325 } 0326 0327 // delete items 0328 for (auto itemId : itemIdsToDelete) { 0329 const auto& item = items[itemId]; 0330 const auto itemRequest = TestItemRequest(*item.get(), true); 0331 const auto index = repoIndices[itemId]; 0332 0333 QCOMPARE(repository.findIndex(itemRequest), index); 0334 repository.deleteItem(index); 0335 QCOMPARE(repository.findIndex(itemRequest), 0); 0336 } 0337 } while (std::next_permutation(itemIdsToDelete.begin(), itemIdsToDelete.end())); 0338 } while (std::next_permutation(itemIdsToCreate.begin(), itemIdsToCreate.end())); 0339 } 0340 } 0341 void usePermissiveModuloWhenRemovingClashLinks() 0342 { 0343 QMutex mutex; 0344 ItemRepository<TestItem, TestItemRequest> repository(QStringLiteral("PermissiveModulo"), &mutex); 0345 0346 const uint bucketHashSize = decltype(repository)::bucketHashSize; 0347 const uint nextBucketHashSize = decltype(repository)::MyBucket::NextBucketHashSize; 0348 auto bucketNumberForIndex = [](const uint index) { 0349 return index >> 16; 0350 }; 0351 0352 const uint clashValue = 2; 0353 0354 // Choose sizes that ensure that the items fit in the desired buckets 0355 const uint bigItemSize = ItemRepositoryBucketSize * 0.55 - 1; 0356 const uint smallItemSize = ItemRepositoryBucketSize * 0.25 - 1; 0357 0358 // Will get placed in bucket 1 (bucket zero is invalid), so the root bucket table at position 'clashValue' will be '1' 0359 const QScopedArrayPointer<TestItem> firstChainFirstLink(createItem(clashValue, bigItemSize)); 0360 const uint firstChainFirstLinkIndex = repository.index(*firstChainFirstLink); 0361 QCOMPARE(bucketNumberForIndex(firstChainFirstLinkIndex), 1u); 0362 0363 // Will also get placed in bucket 1, so root bucket table at position 'nextBucketHashSize + clashValue' will be '1' 0364 const QScopedArrayPointer<TestItem> secondChainFirstLink(createItem(nextBucketHashSize + clashValue, 0365 smallItemSize)); 0366 const uint secondChainFirstLinkIndex = repository.index(*secondChainFirstLink); 0367 QCOMPARE(bucketNumberForIndex(secondChainFirstLinkIndex), 1u); 0368 0369 // Will get placed in bucket 2, so bucket 1's next hash table at position 'clashValue' will be '2' 0370 const QScopedArrayPointer<TestItem> firstChainSecondLink(createItem(bucketHashSize + clashValue, bigItemSize)); 0371 const uint firstChainSecondLinkIndex = repository.index(*firstChainSecondLink); 0372 QCOMPARE(bucketNumberForIndex(firstChainSecondLinkIndex), 2u); 0373 0374 // Will also get placed in bucket 2, reachable since bucket 1's next hash table at position 'clashValue' is '2' 0375 const QScopedArrayPointer<TestItem> secondChainSecondLink(createItem( 0376 bucketHashSize + nextBucketHashSize + clashValue, smallItemSize)); 0377 const uint secondChainSecondLinkIndex = repository.index(*secondChainSecondLink); 0378 QCOMPARE(bucketNumberForIndex(secondChainSecondLinkIndex), 2u); 0379 0380 /* 0381 * At this point we have two chains in the repository, rooted at 'clashValue' and 'nextBucketHashSize + clashValue' 0382 * Both of the chains start in bucket 1 and end in bucket 2, but both chains share the same link to bucket 2 0383 * This is because two of the hashes clash the other two when % bucketHashSize, but all of them clash % nextBucketHashSize 0384 */ 0385 0386 repository.deleteItem(firstChainSecondLinkIndex); 0387 0388 /* 0389 * Now we've deleted the second item in the first chain, this means the first chain no longer requires a link to the 0390 * second bucket where that item was... but the link must remain, since it's shared (clashed) by the second chain. 0391 * 0392 * When cutting a link out of the middle of the chain, we need to check if its items clash using the "permissive" 0393 * modulus (the size of the /next/ buckets map), which is always a factor of the "stricter" modulus (the size of the 0394 * /root/ buckets map). 0395 * 0396 * This behavior implies that there will sometimes be useless buckets in the bucket chain for a given hash, so when 0397 * cutting out the root link, it's safe to skip over them to the first clash with the 'stricter' modulus. 0398 */ 0399 0400 // The second item of the second chain must still be reachable 0401 QCOMPARE(repository.findIndex(*secondChainSecondLink), secondChainSecondLinkIndex); 0402 0403 /* 0404 * As a memo to anyone who's still reading this, this also means the following situation can exist: 0405 * 0406 * bucketHashSize == 8 0407 * nextBucketHashSize == 4 0408 * U is a link table 0409 * B is a bucket 0410 * [...] are the hashes of the contained items 0411 * 0412 * U 0413 * U 0414 * U -> B1 0415 * U 0416 * U 0417 * U 0418 * U -> B2 0419 * U 0420 * 0421 * B0 (Invalid) 0422 * B1 -> [2, 6] 0423 * U 0424 * U 0425 * U -> B3 0426 * U 0427 * B2 -> [14] 0428 * U 0429 * U 0430 * U -> B1 0431 * U 0432 * B3 -> [10] 0433 * U 0434 * U 0435 * U 0436 * U 0437 * 0438 * The chain for hash 6 is: 0439 * Root[6] -> B2[2] -> B1[2] -> B3 0440 * 0441 * If you remove the item with hash 6, 6 and 2 will clash with mod 4 (permissive) 0442 * 0443 * So the useless link `B2[2] -> B1` will be preserved, even though its useless 0444 * as the item with hash 2 is reachable directly from the root. 0445 * 0446 * So TODO: Don't preserve links to items accessible from root buckets. This cannot 0447 * be done correctly using only Bucket::hasClashingItem as of now. 0448 */ 0449 } 0450 }; 0451 0452 #include "test_itemrepository.moc" 0453 0454 QTEST_MAIN(TestItemRepository)