File indexing completed on 2024-05-12 04:38:08

0001 /*
0002     SPDX-FileCopyrightText: 2007 David Nolden <david.nolden.kdevelop@art-master.de>
0003 
0004     SPDX-License-Identifier: GPL-2.0-or-later
0005 */
0006 
0007 #ifndef KDEVPLATFORM_BASICSETREPOSITORY_H
0008 #define KDEVPLATFORM_BASICSETREPOSITORY_H
0009 
0010 #include <set>
0011 #include <vector>
0012 #include <language/languageexport.h>
0013 #include <language/util/kdevhash.h>
0014 #include <serialization/itemrepository.h>
0015 
0016 /**
0017  * This file provides a set system that can be used to efficiently manage sub-sets of a set of global objects.
0018  * Each global object must be mapped to a natural number.
0019  *
0020  * The efficiency comes from:
0021  * 1. The sets are represented by binary trees, where every single tree node can be shared across multiple sets.
0022  *    For that reason, intersecting sets will share large parts of their internal data structures, which leads to
0023  *    extremely low memory-usage compared to using for example std::set.
0024  *
0025  * The more common intersections between the sets exist, the more efficient the system is.
0026  * This will have the biggest advantage when items that were added contiguously are commonly
0027  * used within the same sets, and work fastest if the intersections between different sets are contiguously long.
0028  *
0029  * That makes it perfect for representing sets that are inherited across tree-like structures, like for example in C++:
0030  * - Macros defined in files(Macros are inherited from included files)
0031  * - Strings contained in files(Strings are inherited from included files)
0032  * - Set of all included files
0033  *
0034  * Measurements(see in kdevelop languages/cpp/cppduchain/tests/duchaintest) show that even in worst case(with totally random sets)
0035  * these set-repositories are 2 times faster than std::set, and 4 times faster than QSet.
0036  *
0037  * The main disadvantages are that a global repository needs to be managed, and needs to be secured from simultaneous write-access
0038  * during multi-threading. This is done internally if the doLocking flag is set while constructing.
0039  * */
0040 
0041 class QString;
0042 
0043 namespace Utils {
0044 enum {
0045     delayedDeletionByDefault = 0
0046 };
0047 
0048 class SetNode;
0049 class BasicSetRepository;
0050 class SetNodeDataRequest;
0051 
0052 ///Internal node representation, exported here for performance reason.
0053 struct KDEVPLATFORMLANGUAGE_EXPORT SetNodeData
0054 {
0055 private:
0056     //Rule: start < end
0057     uint m_start, m_end; //This set-node bounds all indices starting at start until end, not including end.
0058 
0059     //Child nodes
0060     //Rule: left->start == start, right->end == end
0061     //Rule: (left != 0 && right != 0) || (left == 0 && right == 0)
0062     uint m_leftNode, m_rightNode;
0063 
0064     // cached hash of this node data - it only includes the first four data members,
0065     // i.e. m_start, m_end, m_leftNode and m_rightNode
0066     uint m_hash;
0067 
0068 public:
0069     uint m_refCount = 0;
0070 
0071     inline explicit SetNodeData(uint start = 1, uint end = 1, uint leftNode = 0, uint rightNode = 0)
0072         : m_start(start)
0073         , m_end(end)
0074         , m_leftNode(leftNode)
0075         , m_rightNode(rightNode)
0076         , m_hash(calculateHash())
0077     {
0078     }
0079 
0080     inline uint hash() const
0081     {
0082         return m_hash;
0083     }
0084 
0085     uint calculateHash() const
0086     {
0087         return KDevHash() << m_start << m_end << m_leftNode << m_rightNode;
0088     }
0089 
0090     inline short unsigned int itemSize() const
0091     {
0092         return sizeof(SetNodeData);
0093     }
0094 
0095     inline bool contiguous() const
0096     {
0097         return !m_leftNode;
0098     }
0099 
0100     inline bool hasSlaves() const
0101     {
0102         return ( bool )m_leftNode;
0103     }
0104 
0105     inline uint start() const
0106     {
0107         return m_start;
0108     }
0109     inline uint end() const
0110     {
0111         return m_end;
0112     }
0113     inline uint leftNode() const
0114     {
0115         return m_leftNode;
0116     }
0117     inline uint rightNode() const
0118     {
0119         return m_rightNode;
0120     }
0121 };
0122 
0123 using SetDataRepositoryBase
0124     = KDevelop::ItemRepository<SetNodeData, SetNodeDataRequest, false, QRecursiveMutex, sizeof(SetNodeData)>;
0125 
0126 struct SetDataRepository;
0127 
0128 class SetNodeDataRequest
0129 {
0130 public:
0131 
0132     enum {
0133         AverageSize = sizeof(SetNodeData)
0134     };
0135 
0136     //This constructor creates a request that finds or creates a node that equals the given node
0137     //The m_hash must be up to date, and the node must be split correctly around its splitPosition
0138     inline SetNodeDataRequest(const SetNodeData* _data, SetDataRepository& _repository,
0139                               BasicSetRepository* _setRepository);
0140 
0141     ~SetNodeDataRequest();
0142 
0143     using HashType = unsigned int;
0144 
0145     //Should return the m_hash-value associated with this request(For example the m_hash of a string)
0146     inline HashType hash() const
0147     {
0148         return m_hash;
0149     }
0150 
0151     //Should return the size of an item created with createItem
0152     inline uint itemSize() const
0153     {
0154         return sizeof(SetNodeData);
0155     }
0156     //Should create an item where the information of the requested item is permanently stored. The pointer
0157     //@param item equals an allocated range with the size of itemSize().
0158     void createItem(SetNodeData* item) const;
0159 
0160     static void destroy(SetNodeData* data, KDevelop::AbstractItemRepository& _repository);
0161 
0162     static bool persistent(const SetNodeData* item)
0163     {
0164         return ( bool )item->m_refCount;
0165     }
0166 
0167     //Should return whether the here requested item equals the given item
0168     bool equals(const SetNodeData* item) const;
0169 
0170     SetNodeData data;
0171 
0172     uint m_hash;
0173     SetDataRepository& repository;
0174     BasicSetRepository* setRepository; //May be zero when no notifications are wanted
0175     mutable bool m_created;
0176 };
0177 
0178 struct KDEVPLATFORMLANGUAGE_EXPORT SetDataRepository
0179     : public SetDataRepositoryBase
0180 {
0181     SetDataRepository(BasicSetRepository* _setRepository, const QString& name,
0182                       KDevelop::ItemRepositoryRegistry* registry, QRecursiveMutex* mutex)
0183         : SetDataRepositoryBase(name, mutex, registry)
0184         , setRepository(_setRepository)
0185     {
0186     }
0187 
0188     BasicSetRepository* setRepository;
0189 };
0190 
0191 /**
0192  * This object is copyable. It represents a set, and allows iterating through the represented indices.
0193  * */
0194 class KDEVPLATFORMLANGUAGE_EXPORT Set
0195 {
0196 public:
0197     class Iterator;
0198     class IteratorPrivate;
0199     using Index = unsigned int;
0200 
0201     Set();
0202     //Internal constructor
0203     Set(uint treeNode, BasicSetRepository* repository);
0204     ~Set();
0205     Set(const Set& rhs);
0206     Set& operator=(const Set& rhs);
0207 
0208     QString dumpDotGraph() const;
0209 
0210     //Returns an iterator that can be used to iterate over the contained indices
0211     Iterator iterator() const;
0212 
0213     //Returns this set converted to a standard set that contains all indices contained by this set.
0214     std::set<unsigned int> stdSet() const;
0215 
0216     ///Returns the count of items in the set
0217     unsigned int count() const;
0218 
0219     bool contains(Index index) const;
0220 
0221     ///@warning: The following operations can change the global repository, and thus need to be serialized
0222     ///          using mutexes in case of multi-threading.
0223 
0224     ///Set union
0225     Set operator +(const Set& rhs) const;
0226     Set& operator +=(const Set& rhs);
0227 
0228     ///Set intersection
0229     Set operator &(const Set& rhs) const;
0230     Set& operator &=(const Set& rhs);
0231 
0232     ///Set subtraction
0233     Set operator -(const Set& rhs) const;
0234     Set& operator -=(const Set& rhs);
0235 
0236     uint setIndex() const
0237     {
0238         return m_tree;
0239     }
0240 
0241     ///Increase the static reference-count of this set by one. The initial reference-count of newly created sets is zero.
0242     void staticRef();
0243 
0244     ///Decrease the static reference-count of this set by one. This set must have a reference-count >= 1.
0245     ///If this set reaches the reference-count zero, it will be deleted, and all sub-nodes that also reach the reference-count zero
0246     ///will be deleted as well. @warning Either protect ALL your sets by using reference-counting, or don't use it at all.
0247     void staticUnref();
0248 
0249     ///Returns a pointer to the repository this set belongs to. Returns zero when this set is not initialized yet.
0250     BasicSetRepository* repository() const;
0251 
0252 private:
0253     void unrefNode(uint);
0254     friend class BasicSetRepository;
0255 
0256     uint m_tree = 0;
0257     BasicSetRepository* m_repository = nullptr;
0258 };
0259 
0260 /**
0261  * This is a repository that can be used to efficiently manage generic sets
0262  * that are represented by interweaved binary trees.
0263  *
0264  * All strings are based on items that are contained in one master-repository,
0265  * starting at one.
0266  *
0267  * An index of zero is interpreted as invalid.
0268  * */
0269 
0270 class KDEVPLATFORMLANGUAGE_EXPORT BasicSetRepository
0271 {
0272 public:
0273     ///@param name The name must be unique, and is used for loading and storing the data
0274     ///@param registry Where the repository should be registered. If you give zero, it won't be registered, and thus won't be saved to disk.
0275     explicit BasicSetRepository(const QString& name, QRecursiveMutex* mutex,
0276                                 KDevelop::ItemRepositoryRegistry* registry = &KDevelop::globalItemRepositoryRegistry(),
0277                                 bool delayedDeletion = delayedDeletionByDefault);
0278     virtual ~BasicSetRepository();
0279     using Index = unsigned int;
0280 
0281     /**
0282      * Takes a sorted list indices, returns a set representing them
0283      * */
0284     Set createSetFromIndices(const std::vector<Index>& indices);
0285 
0286     /**
0287      * Takes a simple set of indices
0288      * */
0289     Set createSet(const std::set<Index>& indices);
0290 
0291     /**
0292      * Creates a set that only contains that single index.
0293      * For better performance, you should create bigger sets than this.
0294      * */
0295     Set createSet(Index i);
0296 
0297     void printStatistics() const;
0298     KDevelop::ItemRepositoryStatistics statistics() const
0299     {
0300         return m_dataRepository.statistics();
0301     }
0302 
0303     ///Is called when this index is not part of any set any more
0304     virtual void itemRemovedFromSets(uint index);
0305 
0306     ///Is called when this index is added to one of the contained sets for the first time
0307     virtual void itemAddedToSets(uint index);
0308 
0309     inline const SetNodeData* nodeFromIndex(uint index) const
0310     {
0311         if (index)
0312             return m_dataRepository.itemFromIndex(index);
0313         else
0314             return nullptr;
0315     }
0316 
0317     ///Set whether set-nodes with reference-count zero should be deleted only after a delay
0318     ///The default is true.
0319     ///This may be faster when the structure is large anyway and many temporary sets
0320     ///are created, but leads to a sparse structure in memory, which is bad for cache.
0321     void setDelayedDeletion(bool delayed)
0322     {
0323         m_delayedDeletion = delayed;
0324     }
0325 
0326     inline bool delayedDeletion() const
0327     {
0328         return m_delayedDeletion;
0329     }
0330 
0331 private:
0332     friend class Set;
0333     friend class Set::Iterator;
0334     SetDataRepository m_dataRepository;
0335     QRecursiveMutex* m_mutex;
0336     bool m_delayedDeletion;
0337 
0338 //   SetNode
0339 };
0340 
0341 /**
0342  * Use this to iterate over the indices contained in a set
0343  * */
0344 class KDEVPLATFORMLANGUAGE_EXPORT Set::Iterator
0345 {
0346 public:
0347     Iterator();
0348     Iterator(const Iterator& rhs);
0349     Iterator& operator=(const Iterator& rhs);
0350 
0351     ~Iterator();
0352     operator bool() const;
0353     Iterator& operator++();
0354     BasicSetRepository::Index operator*() const;
0355 
0356 private:
0357     friend class Set;
0358     friend class IteratorPrivate;
0359     static inline SetDataRepository& getDataRepository(BasicSetRepository* repo) { return repo->m_dataRepository; }
0360     const QScopedPointer<class IteratorPrivate> d_ptr;
0361     Q_DECLARE_PRIVATE(Iterator)
0362 };
0363 }
0364 
0365 #endif