File indexing completed on 2024-05-19 04:39:27

0001 #include "itemrepositorytestbase.h"
0002 
0003 #include <QTest>
0004 
0005 // enable various debug facilities in the ItemRepository code
0006 // make sure to keep this block on the top, to ensure the include of itemrepository.h
0007 // below picks it up correctly
0008 
0009 #define DEBUG_MONSTERBUCKETS 1
0010 #define DEBUG_ITEMREPOSITORY_LOADING 1
0011 #define DEBUG_ITEM_REACHABILITY 1
0012 #define DEBUG_INCORRECT_DELETE 1
0013 
0014 #include <serialization/indexedstring.h>
0015 #include <serialization/itemrepository.h>
0016 
0017 #include <cstdlib>
0018 #include <ctime>
0019 
0020 #include <algorithm>
0021 #include <memory>
0022 #include <numeric>
0023 #include <utility>
0024 #include <vector>
0025 
0026 using namespace KDevelop;
0027 
0028 struct TestItem
0029 {
0030     TestItem(uint hash, uint dataSize) : m_hash(hash)
0031         , m_dataSize(dataSize)
0032     {
0033     }
0034 
0035     TestItem& operator=(const TestItem& rhs) = delete;
0036 
0037     //Every item has to implement this function, and return a valid hash.
0038     //Must be exactly the same hash value as ExampleItemRequest::hash() has returned while creating the item.
0039     unsigned int hash() const
0040     {
0041         return m_hash;
0042     }
0043 
0044     //Every item has to implement this function, and return the complete size this item takes in memory.
0045     //Must be exactly the same value as ExampleItemRequest::itemSize() has returned while creating the item.
0046     unsigned int itemSize() const
0047     {
0048         return sizeof(TestItem) + m_dataSize;
0049     }
0050 
0051     bool equals(const TestItem* rhs) const
0052     {
0053         return rhs->m_hash == m_hash
0054                && itemSize() == rhs->itemSize()
0055                && memcmp(reinterpret_cast<const char*>(this), rhs, itemSize()) == 0;
0056     }
0057 
0058     uint m_hash;
0059     uint m_dataSize;
0060 };
0061 
0062 struct TestItemRequest
0063 {
0064     TestItem& m_item;
0065     bool m_compareData;
0066 
0067     TestItemRequest(TestItem& item, bool compareData = false)
0068         : m_item(item)
0069         , m_compareData(compareData)
0070     {
0071     }
0072     enum {
0073         AverageSize = 700 //This should be the approximate average size of an Item
0074     };
0075 
0076     uint hash() const
0077     {
0078         return m_item.hash();
0079     }
0080 
0081     //Should return the size of an item created with createItem
0082     uint itemSize() const
0083     {
0084         return m_item.itemSize();
0085     }
0086 
0087     void createItem(TestItem* item) const
0088     {
0089         memcpy(reinterpret_cast<void*>(item), &m_item, m_item.itemSize());
0090     }
0091 
0092     static void destroy(TestItem* /*item*/, AbstractItemRepository&)
0093     {
0094         //Nothing to do
0095     }
0096 
0097     static bool persistent(const TestItem* /*item*/)
0098     {
0099         return true;
0100     }
0101 
0102     //Should return whether the here requested item equals the given item
0103     bool equals(const TestItem* item) const
0104     {
0105         return hash() == item->hash() && (!m_compareData || m_item.equals(item));
0106     }
0107 };
0108 
0109 uint smallItemsFraction = 20; //Fraction of items between 0 and 1 kb
0110 uint largeItemsFraction = 1; //Fraction of items between 0 and 200 kb
0111 uint cycles = 10000;
0112 uint deletionProbability = 1; //Percentual probability that a checked item is deleted. Per-cycle probability must be multiplied with checksPerCycle.
0113 uint checksPerCycle = 10;
0114 
0115 TestItem* createItem(uint id, uint size)
0116 {
0117     TestItem* ret;
0118     char* data = new char[size];
0119     uint dataSize = size - sizeof(TestItem);
0120     ret = new (data) TestItem(id, dataSize);
0121 
0122     //Fill in same random pattern
0123     for (uint a = 0; a < dataSize; ++a)
0124         data[sizeof(TestItem) + a] = ( char )(a + id);
0125 
0126     return ret;
0127 }
0128 
0129 ///@todo Add a test where the complete content is deleted again, and make sure the result has a nice structure
0130 ///@todo More consistency and lost-space tests, especially about monster-buckets. Make sure their space is re-claimed
0131 class TestItemRepository
0132     : public ItemRepositoryTestBase
0133 {
0134     Q_OBJECT
0135 
0136 private Q_SLOTS:
0137     void testItemRepository()
0138     {
0139         QMutex mutex;
0140         ItemRepository<TestItem, TestItemRequest> repository(QStringLiteral("TestItemRepository"), &mutex);
0141 
0142         // calling statistics here does some extended tests and shouldn't crash or assert
0143         repository.statistics();
0144 
0145         uint itemId = 0;
0146         QHash<uint, TestItem*> realItemsByIndex;
0147         QHash<uint, TestItem*> realItemsById;
0148         uint totalInsertions = 0, totalDeletions = 0;
0149         uint maxSize = 0;
0150         uint totalSize = 0;
0151         srand(12345);
0152         uint highestSeenIndex = 0;
0153 
0154         for (uint a = 0; a < cycles; ++a) {
0155             {
0156                 //Insert an item
0157                 uint itemDecision = rand() % (smallItemsFraction + largeItemsFraction);
0158                 uint itemSize;
0159                 if (itemDecision < largeItemsFraction) {
0160                     //Create a large item: Up to 200kb
0161                     itemSize = (rand() % 200000) + sizeof(TestItem);
0162                 } else
0163                     itemSize = (rand() % 1000) + sizeof(TestItem);
0164                 TestItem* item = createItem(++itemId, itemSize);
0165                 Q_ASSERT(item->hash() == itemId);
0166                 QVERIFY(item->equals(item));
0167                 uint index = repository.index(TestItemRequest(*item));
0168                 if (index > highestSeenIndex)
0169                     highestSeenIndex = index;
0170                 Q_ASSERT(index);
0171                 realItemsByIndex.insert(index, item);
0172                 realItemsById.insert(itemId, item);
0173                 ++totalInsertions;
0174                 totalSize += itemSize;
0175                 if (itemSize > maxSize)
0176                     maxSize = itemSize;
0177             }
0178 
0179             for (uint a = 0; a < checksPerCycle; ++a) {
0180                 //Check an item
0181                 uint pick = rand() % itemId;
0182                 if (realItemsById.contains(pick)) {
0183                     uint index = repository.findIndex(*realItemsById[pick]);
0184                     QVERIFY(index);
0185                     QVERIFY(realItemsByIndex.contains(index));
0186                     QVERIFY(realItemsByIndex[index]->equals(repository.itemFromIndex(index)));
0187 
0188                     if (( uint ) (rand() % 100) < deletionProbability) {
0189                         ++totalDeletions;
0190                         //Delete the item
0191                         repository.deleteItem(index);
0192                         QVERIFY(!repository.findIndex(*realItemsById[pick]));
0193                         uint newIndex = repository.index(*realItemsById[pick]);
0194                         QVERIFY(newIndex);
0195                         QVERIFY(realItemsByIndex[index]->equals(repository.itemFromIndex(newIndex)));
0196 
0197 #ifdef POSITION_TEST
0198                         //Since we have previously deleted the item, there must be enough space
0199                         if (!((newIndex >> 16) <= (highestSeenIndex >> 16))) {
0200                             qDebug() << "size:" << realItemsById[pick]->itemSize();
0201                             qDebug() << "previous highest seen bucket:" << (highestSeenIndex >> 16);
0202                             qDebug() << "new bucket:" << (newIndex >> 16);
0203                         }
0204                         QVERIFY((newIndex >> 16) <= (highestSeenIndex >> 16));
0205 #endif
0206                         repository.deleteItem(newIndex);
0207                         QVERIFY(!repository.findIndex(*realItemsById[pick]));
0208                         delete[] realItemsById[pick];
0209                         realItemsById.remove(pick);
0210                         realItemsByIndex.remove(index);
0211                     }
0212                 }
0213             }
0214 
0215             // calling statistics here does some extended tests and shouldn't crash or assert
0216             repository.statistics();
0217         }
0218 
0219         // cleanup
0220         {
0221             for (auto it = realItemsByIndex.constBegin(); it != realItemsByIndex.constEnd(); ++it) {
0222                 repository.deleteItem(it.key());
0223                 delete[] it.value();
0224             }
0225 
0226             realItemsById.clear();
0227             realItemsByIndex.clear();
0228         }
0229 
0230         qDebug() << "total insertions:" << totalInsertions << "total deletions:" << totalDeletions <<
0231         "average item size:" << (totalSize / totalInsertions) << "biggest item size:" << maxSize;
0232 
0233         const auto stats = repository.statistics();
0234         qDebug() << stats;
0235         QVERIFY(stats.freeUnreachableSpace < stats.freeSpaceInBuckets / 100); // < 1% of the free space is unreachable
0236         QVERIFY(stats.freeSpaceInBuckets < stats.usedSpaceForBuckets); // < 20% free space
0237     }
0238     void testLeaks()
0239     {
0240         srand(12345);
0241         QMutex mutex;
0242         ItemRepository<TestItem, TestItemRequest> repository(QStringLiteral("TestItemRepository"), &mutex);
0243         QList<TestItem*> items;
0244         for (int i = 0; i < 10000; ++i) {
0245             TestItem* item = createItem(i, (rand() % 1000) + sizeof(TestItem));
0246             items << item;
0247             repository.index(TestItemRequest(*item));
0248         }
0249 
0250         for (auto item : qAsConst(items)) {
0251             delete[] item;
0252         }
0253     }
0254     void testStringSharing()
0255     {
0256         QString qString;
0257         qString.fill('.', 1000);
0258         IndexedString indexedString(qString);
0259         const int repeat = 10000;
0260         QVector<QString> strings;
0261         strings.resize(repeat);
0262         for (int i = 0; i < repeat; ++i) {
0263             strings[i] = indexedString.str();
0264             QCOMPARE(qString, strings[i]);
0265         }
0266     }
0267     void deleteClashingMonsterBucket_data()
0268     {
0269         QTest::addColumn<QVector<uint>>("itemSizes");
0270         int testIndex = 0;
0271 
0272         auto addRow = [&testIndex](QVector<uint> itemSizes) {
0273             QTest::addRow("%d", testIndex++) << itemSizes;
0274         };
0275 
0276         const auto big = ItemRepositoryBucketSize + 10;
0277         const auto small = 20;
0278         addRow({small, big});
0279         addRow({small, small + 1});
0280         addRow({big, big + 1});
0281         addRow({small, small + 1, small + 2});
0282         addRow({small, small + 1, big});
0283         addRow({small, big, big + 1});
0284         addRow({big, big + 1, big + 2});
0285     }
0286     void deleteClashingMonsterBucket()
0287     {
0288         QFETCH(QVector<uint>, itemSizes);
0289 
0290         std::vector<std::size_t> originalItemIds(itemSizes.size());
0291         std::iota(originalItemIds.begin(), originalItemIds.end(), 0);
0292 
0293         QMutex mutex;
0294         ItemRepository<TestItem, TestItemRequest> repository(QStringLiteral("TestItemRepository"), &mutex);
0295 
0296         // repeat the below for multiple hashes
0297         for (auto hash : {1235u, 1236u}) {
0298             // one permutation for the insertion order
0299             auto itemIdsToCreate = originalItemIds;
0300             do {
0301                 // one permutation for the deletion order
0302                 auto itemIdsToDelete = originalItemIds;
0303                 do {
0304                     std::vector<uint> repoIndices;
0305                     repoIndices.reserve(itemSizes.size());
0306                     std::vector<std::unique_ptr<TestItem[]>> items;
0307                     items.reserve(itemSizes.size());
0308 
0309                     // create items
0310                     for (auto itemId : itemIdsToCreate) {
0311                         std::unique_ptr<TestItem[]> item(createItem(hash, itemSizes[itemId]));
0312                         const auto itemRequest = TestItemRequest(*item.get(), true);
0313 
0314                         QVERIFY(!repository.findIndex(itemRequest));
0315 
0316                         for (const auto& otherItem : items)
0317                             QVERIFY(!otherItem.get()->equals(item.get()));
0318 
0319                         auto index = repository.index(itemRequest);
0320                         QVERIFY(index);
0321                         QCOMPARE(repository.findIndex(itemRequest), index);
0322 
0323                         items.push_back(std::move(item));
0324                         repoIndices.push_back(index);
0325                     }
0326 
0327                     // delete items
0328                     for (auto itemId : itemIdsToDelete) {
0329                         const auto& item = items[itemId];
0330                         const auto itemRequest = TestItemRequest(*item.get(), true);
0331                         const auto index = repoIndices[itemId];
0332 
0333                         QCOMPARE(repository.findIndex(itemRequest), index);
0334                         repository.deleteItem(index);
0335                         QCOMPARE(repository.findIndex(itemRequest), 0);
0336                     }
0337                 } while (std::next_permutation(itemIdsToDelete.begin(), itemIdsToDelete.end()));
0338             } while (std::next_permutation(itemIdsToCreate.begin(), itemIdsToCreate.end()));
0339         }
0340     }
0341     void usePermissiveModuloWhenRemovingClashLinks()
0342     {
0343         QMutex mutex;
0344         ItemRepository<TestItem, TestItemRequest> repository(QStringLiteral("PermissiveModulo"), &mutex);
0345 
0346         const uint bucketHashSize = decltype(repository)::bucketHashSize;
0347         const uint nextBucketHashSize = decltype(repository)::MyBucket::NextBucketHashSize;
0348         auto bucketNumberForIndex = [](const uint index) {
0349                                         return index >> 16;
0350                                     };
0351 
0352         const uint clashValue = 2;
0353 
0354         // Choose sizes that ensure that the items fit in the desired buckets
0355         const uint bigItemSize = ItemRepositoryBucketSize * 0.55 - 1;
0356         const uint smallItemSize = ItemRepositoryBucketSize * 0.25 - 1;
0357 
0358         // Will get placed in bucket 1 (bucket zero is invalid), so the root bucket table at position 'clashValue' will be '1'
0359         const QScopedArrayPointer<TestItem> firstChainFirstLink(createItem(clashValue, bigItemSize));
0360         const uint firstChainFirstLinkIndex = repository.index(*firstChainFirstLink);
0361         QCOMPARE(bucketNumberForIndex(firstChainFirstLinkIndex), 1u);
0362 
0363         // Will also get placed in bucket 1, so root bucket table at position 'nextBucketHashSize + clashValue' will be '1'
0364         const QScopedArrayPointer<TestItem> secondChainFirstLink(createItem(nextBucketHashSize + clashValue,
0365                                                                             smallItemSize));
0366         const uint secondChainFirstLinkIndex = repository.index(*secondChainFirstLink);
0367         QCOMPARE(bucketNumberForIndex(secondChainFirstLinkIndex), 1u);
0368 
0369         // Will get placed in bucket 2, so bucket 1's next hash table at position 'clashValue' will be '2'
0370         const QScopedArrayPointer<TestItem> firstChainSecondLink(createItem(bucketHashSize + clashValue, bigItemSize));
0371         const uint firstChainSecondLinkIndex = repository.index(*firstChainSecondLink);
0372         QCOMPARE(bucketNumberForIndex(firstChainSecondLinkIndex), 2u);
0373 
0374         // Will also get placed in bucket 2, reachable since bucket 1's next hash table at position 'clashValue' is '2'
0375         const QScopedArrayPointer<TestItem> secondChainSecondLink(createItem(
0376                 bucketHashSize + nextBucketHashSize + clashValue, smallItemSize));
0377         const uint secondChainSecondLinkIndex = repository.index(*secondChainSecondLink);
0378         QCOMPARE(bucketNumberForIndex(secondChainSecondLinkIndex), 2u);
0379 
0380         /*
0381          * At this point we have two chains in the repository, rooted at 'clashValue' and 'nextBucketHashSize + clashValue'
0382          * Both of the chains start in bucket 1 and end in bucket 2, but both chains share the same link to bucket 2
0383          * This is because two of the hashes clash the other two when % bucketHashSize, but all of them clash % nextBucketHashSize
0384          */
0385 
0386         repository.deleteItem(firstChainSecondLinkIndex);
0387 
0388         /*
0389          * Now we've deleted the second item in the first chain, this means the first chain no longer requires a link to the
0390          * second bucket where that item was... but the link must remain, since it's shared (clashed) by the second chain.
0391          *
0392          * When cutting a link out of the middle of the chain, we need to check if its items clash using the "permissive"
0393          * modulus (the size of the /next/ buckets map), which is always a factor of the "stricter" modulus (the size of the
0394          * /root/ buckets map).
0395          *
0396          * This behavior implies that there will sometimes be useless buckets in the bucket chain for a given hash, so when
0397          * cutting out the root link, it's safe to skip over them to the first clash with the 'stricter' modulus.
0398          */
0399 
0400         // The second item of the second chain must still be reachable
0401         QCOMPARE(repository.findIndex(*secondChainSecondLink), secondChainSecondLinkIndex);
0402 
0403         /*
0404          * As a memo to anyone who's still reading this, this also means the following situation can exist:
0405          *
0406          * bucketHashSize == 8
0407          * nextBucketHashSize == 4
0408          * U is a link table
0409          * B is a bucket
0410          * [...] are the hashes of the contained items
0411          *
0412          * U
0413          * U
0414          * U -> B1
0415          * U
0416          * U
0417          * U
0418          * U -> B2
0419          * U
0420          *
0421          * B0 (Invalid)
0422          * B1 -> [2, 6]
0423          *   U
0424          *   U
0425          *   U -> B3
0426          *   U
0427          * B2 -> [14]
0428          *   U
0429          *   U
0430          *   U -> B1
0431          *   U
0432          * B3 -> [10]
0433          *   U
0434          *   U
0435          *   U
0436          *   U
0437          *
0438          * The chain for hash 6 is:
0439          * Root[6] -> B2[2] -> B1[2] -> B3
0440          *
0441          * If you remove the item with hash 6, 6 and 2 will clash with mod 4 (permissive)
0442          *
0443          * So the useless link `B2[2] -> B1` will be preserved, even though its useless
0444          * as the item with hash 2 is reachable directly from the root.
0445          *
0446          * So TODO: Don't preserve links to items accessible from root buckets. This cannot
0447          * be done correctly using only Bucket::hasClashingItem as of now.
0448          */
0449     }
0450 };
0451 
0452 #include "test_itemrepository.moc"
0453 
0454 QTEST_MAIN(TestItemRepository)