File indexing completed on 2024-05-12 04:38:08
0001 /* 0002 SPDX-FileCopyrightText: 2007 David Nolden <david.nolden.kdevelop@art-master.de> 0003 0004 SPDX-License-Identifier: GPL-2.0-or-later 0005 */ 0006 0007 #ifndef KDEVPLATFORM_BASICSETREPOSITORY_H 0008 #define KDEVPLATFORM_BASICSETREPOSITORY_H 0009 0010 #include <set> 0011 #include <vector> 0012 #include <language/languageexport.h> 0013 #include <language/util/kdevhash.h> 0014 #include <serialization/itemrepository.h> 0015 0016 /** 0017 * This file provides a set system that can be used to efficiently manage sub-sets of a set of global objects. 0018 * Each global object must be mapped to a natural number. 0019 * 0020 * The efficiency comes from: 0021 * 1. The sets are represented by binary trees, where every single tree node can be shared across multiple sets. 0022 * For that reason, intersecting sets will share large parts of their internal data structures, which leads to 0023 * extremely low memory-usage compared to using for example std::set. 0024 * 0025 * The more common intersections between the sets exist, the more efficient the system is. 0026 * This will have the biggest advantage when items that were added contiguously are commonly 0027 * used within the same sets, and work fastest if the intersections between different sets are contiguously long. 0028 * 0029 * That makes it perfect for representing sets that are inherited across tree-like structures, like for example in C++: 0030 * - Macros defined in files(Macros are inherited from included files) 0031 * - Strings contained in files(Strings are inherited from included files) 0032 * - Set of all included files 0033 * 0034 * Measurements(see in kdevelop languages/cpp/cppduchain/tests/duchaintest) show that even in worst case(with totally random sets) 0035 * these set-repositories are 2 times faster than std::set, and 4 times faster than QSet. 0036 * 0037 * The main disadvantages are that a global repository needs to be managed, and needs to be secured from simultaneous write-access 0038 * during multi-threading. This is done internally if the doLocking flag is set while constructing. 0039 * */ 0040 0041 class QString; 0042 0043 namespace Utils { 0044 enum { 0045 delayedDeletionByDefault = 0 0046 }; 0047 0048 class SetNode; 0049 class BasicSetRepository; 0050 class SetNodeDataRequest; 0051 0052 ///Internal node representation, exported here for performance reason. 0053 struct KDEVPLATFORMLANGUAGE_EXPORT SetNodeData 0054 { 0055 private: 0056 //Rule: start < end 0057 uint m_start, m_end; //This set-node bounds all indices starting at start until end, not including end. 0058 0059 //Child nodes 0060 //Rule: left->start == start, right->end == end 0061 //Rule: (left != 0 && right != 0) || (left == 0 && right == 0) 0062 uint m_leftNode, m_rightNode; 0063 0064 // cached hash of this node data - it only includes the first four data members, 0065 // i.e. m_start, m_end, m_leftNode and m_rightNode 0066 uint m_hash; 0067 0068 public: 0069 uint m_refCount = 0; 0070 0071 inline explicit SetNodeData(uint start = 1, uint end = 1, uint leftNode = 0, uint rightNode = 0) 0072 : m_start(start) 0073 , m_end(end) 0074 , m_leftNode(leftNode) 0075 , m_rightNode(rightNode) 0076 , m_hash(calculateHash()) 0077 { 0078 } 0079 0080 inline uint hash() const 0081 { 0082 return m_hash; 0083 } 0084 0085 uint calculateHash() const 0086 { 0087 return KDevHash() << m_start << m_end << m_leftNode << m_rightNode; 0088 } 0089 0090 inline short unsigned int itemSize() const 0091 { 0092 return sizeof(SetNodeData); 0093 } 0094 0095 inline bool contiguous() const 0096 { 0097 return !m_leftNode; 0098 } 0099 0100 inline bool hasSlaves() const 0101 { 0102 return ( bool )m_leftNode; 0103 } 0104 0105 inline uint start() const 0106 { 0107 return m_start; 0108 } 0109 inline uint end() const 0110 { 0111 return m_end; 0112 } 0113 inline uint leftNode() const 0114 { 0115 return m_leftNode; 0116 } 0117 inline uint rightNode() const 0118 { 0119 return m_rightNode; 0120 } 0121 }; 0122 0123 using SetDataRepositoryBase 0124 = KDevelop::ItemRepository<SetNodeData, SetNodeDataRequest, false, QRecursiveMutex, sizeof(SetNodeData)>; 0125 0126 struct SetDataRepository; 0127 0128 class SetNodeDataRequest 0129 { 0130 public: 0131 0132 enum { 0133 AverageSize = sizeof(SetNodeData) 0134 }; 0135 0136 //This constructor creates a request that finds or creates a node that equals the given node 0137 //The m_hash must be up to date, and the node must be split correctly around its splitPosition 0138 inline SetNodeDataRequest(const SetNodeData* _data, SetDataRepository& _repository, 0139 BasicSetRepository* _setRepository); 0140 0141 ~SetNodeDataRequest(); 0142 0143 using HashType = unsigned int; 0144 0145 //Should return the m_hash-value associated with this request(For example the m_hash of a string) 0146 inline HashType hash() const 0147 { 0148 return m_hash; 0149 } 0150 0151 //Should return the size of an item created with createItem 0152 inline uint itemSize() const 0153 { 0154 return sizeof(SetNodeData); 0155 } 0156 //Should create an item where the information of the requested item is permanently stored. The pointer 0157 //@param item equals an allocated range with the size of itemSize(). 0158 void createItem(SetNodeData* item) const; 0159 0160 static void destroy(SetNodeData* data, KDevelop::AbstractItemRepository& _repository); 0161 0162 static bool persistent(const SetNodeData* item) 0163 { 0164 return ( bool )item->m_refCount; 0165 } 0166 0167 //Should return whether the here requested item equals the given item 0168 bool equals(const SetNodeData* item) const; 0169 0170 SetNodeData data; 0171 0172 uint m_hash; 0173 SetDataRepository& repository; 0174 BasicSetRepository* setRepository; //May be zero when no notifications are wanted 0175 mutable bool m_created; 0176 }; 0177 0178 struct KDEVPLATFORMLANGUAGE_EXPORT SetDataRepository 0179 : public SetDataRepositoryBase 0180 { 0181 SetDataRepository(BasicSetRepository* _setRepository, const QString& name, 0182 KDevelop::ItemRepositoryRegistry* registry, QRecursiveMutex* mutex) 0183 : SetDataRepositoryBase(name, mutex, registry) 0184 , setRepository(_setRepository) 0185 { 0186 } 0187 0188 BasicSetRepository* setRepository; 0189 }; 0190 0191 /** 0192 * This object is copyable. It represents a set, and allows iterating through the represented indices. 0193 * */ 0194 class KDEVPLATFORMLANGUAGE_EXPORT Set 0195 { 0196 public: 0197 class Iterator; 0198 class IteratorPrivate; 0199 using Index = unsigned int; 0200 0201 Set(); 0202 //Internal constructor 0203 Set(uint treeNode, BasicSetRepository* repository); 0204 ~Set(); 0205 Set(const Set& rhs); 0206 Set& operator=(const Set& rhs); 0207 0208 QString dumpDotGraph() const; 0209 0210 //Returns an iterator that can be used to iterate over the contained indices 0211 Iterator iterator() const; 0212 0213 //Returns this set converted to a standard set that contains all indices contained by this set. 0214 std::set<unsigned int> stdSet() const; 0215 0216 ///Returns the count of items in the set 0217 unsigned int count() const; 0218 0219 bool contains(Index index) const; 0220 0221 ///@warning: The following operations can change the global repository, and thus need to be serialized 0222 /// using mutexes in case of multi-threading. 0223 0224 ///Set union 0225 Set operator +(const Set& rhs) const; 0226 Set& operator +=(const Set& rhs); 0227 0228 ///Set intersection 0229 Set operator &(const Set& rhs) const; 0230 Set& operator &=(const Set& rhs); 0231 0232 ///Set subtraction 0233 Set operator -(const Set& rhs) const; 0234 Set& operator -=(const Set& rhs); 0235 0236 uint setIndex() const 0237 { 0238 return m_tree; 0239 } 0240 0241 ///Increase the static reference-count of this set by one. The initial reference-count of newly created sets is zero. 0242 void staticRef(); 0243 0244 ///Decrease the static reference-count of this set by one. This set must have a reference-count >= 1. 0245 ///If this set reaches the reference-count zero, it will be deleted, and all sub-nodes that also reach the reference-count zero 0246 ///will be deleted as well. @warning Either protect ALL your sets by using reference-counting, or don't use it at all. 0247 void staticUnref(); 0248 0249 ///Returns a pointer to the repository this set belongs to. Returns zero when this set is not initialized yet. 0250 BasicSetRepository* repository() const; 0251 0252 private: 0253 void unrefNode(uint); 0254 friend class BasicSetRepository; 0255 0256 uint m_tree = 0; 0257 BasicSetRepository* m_repository = nullptr; 0258 }; 0259 0260 /** 0261 * This is a repository that can be used to efficiently manage generic sets 0262 * that are represented by interweaved binary trees. 0263 * 0264 * All strings are based on items that are contained in one master-repository, 0265 * starting at one. 0266 * 0267 * An index of zero is interpreted as invalid. 0268 * */ 0269 0270 class KDEVPLATFORMLANGUAGE_EXPORT BasicSetRepository 0271 { 0272 public: 0273 ///@param name The name must be unique, and is used for loading and storing the data 0274 ///@param registry Where the repository should be registered. If you give zero, it won't be registered, and thus won't be saved to disk. 0275 explicit BasicSetRepository(const QString& name, QRecursiveMutex* mutex, 0276 KDevelop::ItemRepositoryRegistry* registry = &KDevelop::globalItemRepositoryRegistry(), 0277 bool delayedDeletion = delayedDeletionByDefault); 0278 virtual ~BasicSetRepository(); 0279 using Index = unsigned int; 0280 0281 /** 0282 * Takes a sorted list indices, returns a set representing them 0283 * */ 0284 Set createSetFromIndices(const std::vector<Index>& indices); 0285 0286 /** 0287 * Takes a simple set of indices 0288 * */ 0289 Set createSet(const std::set<Index>& indices); 0290 0291 /** 0292 * Creates a set that only contains that single index. 0293 * For better performance, you should create bigger sets than this. 0294 * */ 0295 Set createSet(Index i); 0296 0297 void printStatistics() const; 0298 KDevelop::ItemRepositoryStatistics statistics() const 0299 { 0300 return m_dataRepository.statistics(); 0301 } 0302 0303 ///Is called when this index is not part of any set any more 0304 virtual void itemRemovedFromSets(uint index); 0305 0306 ///Is called when this index is added to one of the contained sets for the first time 0307 virtual void itemAddedToSets(uint index); 0308 0309 inline const SetNodeData* nodeFromIndex(uint index) const 0310 { 0311 if (index) 0312 return m_dataRepository.itemFromIndex(index); 0313 else 0314 return nullptr; 0315 } 0316 0317 ///Set whether set-nodes with reference-count zero should be deleted only after a delay 0318 ///The default is true. 0319 ///This may be faster when the structure is large anyway and many temporary sets 0320 ///are created, but leads to a sparse structure in memory, which is bad for cache. 0321 void setDelayedDeletion(bool delayed) 0322 { 0323 m_delayedDeletion = delayed; 0324 } 0325 0326 inline bool delayedDeletion() const 0327 { 0328 return m_delayedDeletion; 0329 } 0330 0331 private: 0332 friend class Set; 0333 friend class Set::Iterator; 0334 SetDataRepository m_dataRepository; 0335 QRecursiveMutex* m_mutex; 0336 bool m_delayedDeletion; 0337 0338 // SetNode 0339 }; 0340 0341 /** 0342 * Use this to iterate over the indices contained in a set 0343 * */ 0344 class KDEVPLATFORMLANGUAGE_EXPORT Set::Iterator 0345 { 0346 public: 0347 Iterator(); 0348 Iterator(const Iterator& rhs); 0349 Iterator& operator=(const Iterator& rhs); 0350 0351 ~Iterator(); 0352 operator bool() const; 0353 Iterator& operator++(); 0354 BasicSetRepository::Index operator*() const; 0355 0356 private: 0357 friend class Set; 0358 friend class IteratorPrivate; 0359 static inline SetDataRepository& getDataRepository(BasicSetRepository* repo) { return repo->m_dataRepository; } 0360 const QScopedPointer<class IteratorPrivate> d_ptr; 0361 Q_DECLARE_PRIVATE(Iterator) 0362 }; 0363 } 0364 0365 #endif