File indexing completed on 2024-11-24 05:07:24
0001 /** 0002 * MIT License 0003 * 0004 * Copyright (c) 2017 Thibaut Goetghebuer-Planchon <tessil@gmx.com> 0005 * 0006 * Permission is hereby granted, free of charge, to any person obtaining a copy 0007 * of this software and associated documentation files (the "Software"), to deal 0008 * in the Software without restriction, including without limitation the rights 0009 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 0010 * copies of the Software, and to permit persons to whom the Software is 0011 * furnished to do so, subject to the following conditions: 0012 * 0013 * The above copyright notice and this permission notice shall be included in 0014 * all copies or substantial portions of the Software. 0015 * 0016 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 0017 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 0018 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 0019 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 0020 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 0021 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 0022 * SOFTWARE. 0023 */ 0024 #ifndef TSL_ROBIN_HASH_H 0025 #define TSL_ROBIN_HASH_H 0026 0027 #include <algorithm> 0028 #include <cassert> 0029 #include <cmath> 0030 #include <cstddef> 0031 #include <cstdint> 0032 #include <exception> 0033 #include <iterator> 0034 #include <limits> 0035 #include <memory> 0036 #include <new> 0037 #include <stdexcept> 0038 #include <tuple> 0039 #include <type_traits> 0040 #include <utility> 0041 #include <vector> 0042 0043 #include "robin_growth_policy.h" 0044 0045 namespace tsl { 0046 0047 namespace detail_robin_hash { 0048 0049 template <typename T> 0050 struct make_void { 0051 using type = void; 0052 }; 0053 0054 template <typename T, typename = void> 0055 struct has_is_transparent : std::false_type {}; 0056 0057 template <typename T> 0058 struct has_is_transparent<T, 0059 typename make_void<typename T::is_transparent>::type> 0060 : std::true_type {}; 0061 0062 template <typename U> 0063 struct is_power_of_two_policy : std::false_type {}; 0064 0065 template <std::size_t GrowthFactor> 0066 struct is_power_of_two_policy<tsl::rh::power_of_two_growth_policy<GrowthFactor>> 0067 : std::true_type {}; 0068 0069 // Only available in C++17, we need to be compatible with C++11 0070 template <class T> 0071 const T& clamp(const T& v, const T& lo, const T& hi) { 0072 return std::min(hi, std::max(lo, v)); 0073 } 0074 0075 template <typename T, typename U> 0076 static T numeric_cast(U value, 0077 const char* error_message = "numeric_cast() failed.") { 0078 T ret = static_cast<T>(value); 0079 if (static_cast<U>(ret) != value) { 0080 TSL_RH_THROW_OR_TERMINATE(std::runtime_error, error_message); 0081 } 0082 0083 const bool is_same_signedness = 0084 (std::is_unsigned<T>::value && std::is_unsigned<U>::value) || 0085 (std::is_signed<T>::value && std::is_signed<U>::value); 0086 if (!is_same_signedness && (ret < T{}) != (value < U{})) { 0087 TSL_RH_THROW_OR_TERMINATE(std::runtime_error, error_message); 0088 } 0089 0090 return ret; 0091 } 0092 0093 template <class T, class Deserializer> 0094 static T deserialize_value(Deserializer& deserializer) { 0095 // MSVC < 2017 is not conformant, circumvent the problem by removing the 0096 // template keyword 0097 #if defined(_MSC_VER) && _MSC_VER < 1910 0098 return deserializer.Deserializer::operator()<T>(); 0099 #else 0100 return deserializer.Deserializer::template operator()<T>(); 0101 #endif 0102 } 0103 0104 /** 0105 * Fixed size type used to represent size_type values on serialization. Need to 0106 * be big enough to represent a std::size_t on 32 and 64 bits platforms, and 0107 * must be the same size on both platforms. 0108 */ 0109 using slz_size_type = std::uint64_t; 0110 static_assert(std::numeric_limits<slz_size_type>::max() >= 0111 std::numeric_limits<std::size_t>::max(), 0112 "slz_size_type must be >= std::size_t"); 0113 0114 using truncated_hash_type = std::uint32_t; 0115 0116 /** 0117 * Helper class that stores a truncated hash if StoreHash is true and nothing 0118 * otherwise. 0119 */ 0120 template <bool StoreHash> 0121 class bucket_entry_hash { 0122 public: 0123 bool bucket_hash_equal(std::size_t /*hash*/) const noexcept { return true; } 0124 0125 truncated_hash_type truncated_hash() const noexcept { return 0; } 0126 0127 protected: 0128 void set_hash(truncated_hash_type /*hash*/) noexcept {} 0129 }; 0130 0131 template <> 0132 class bucket_entry_hash<true> { 0133 public: 0134 bool bucket_hash_equal(std::size_t hash) const noexcept { 0135 return m_hash == truncated_hash_type(hash); 0136 } 0137 0138 truncated_hash_type truncated_hash() const noexcept { return m_hash; } 0139 0140 protected: 0141 void set_hash(truncated_hash_type hash) noexcept { 0142 m_hash = truncated_hash_type(hash); 0143 } 0144 0145 private: 0146 truncated_hash_type m_hash; 0147 }; 0148 0149 /** 0150 * Each bucket entry has: 0151 * - A value of type `ValueType`. 0152 * - An integer to store how far the value of the bucket, if any, is from its 0153 * ideal bucket (ex: if the current bucket 5 has the value 'foo' and 0154 * `hash('foo') % nb_buckets` == 3, `dist_from_ideal_bucket()` will return 2 as 0155 * the current value of the bucket is two buckets away from its ideal bucket) If 0156 * there is no value in the bucket (i.e. `empty()` is true) 0157 * `dist_from_ideal_bucket()` will be < 0. 0158 * - A marker which tells us if the bucket is the last bucket of the bucket 0159 * array (useful for the iterator of the hash table). 0160 * - If `StoreHash` is true, 32 bits of the hash of the value, if any, are also 0161 * stored in the bucket. If the size of the hash is more than 32 bits, it is 0162 * truncated. We don't store the full hash as storing the hash is a potential 0163 * opportunity to use the unused space due to the alignment of the bucket_entry 0164 * structure. We can thus potentially store the hash without any extra space 0165 * (which would not be possible with 64 bits of the hash). 0166 */ 0167 template <typename ValueType, bool StoreHash> 0168 class bucket_entry : public bucket_entry_hash<StoreHash> { 0169 using bucket_hash = bucket_entry_hash<StoreHash>; 0170 0171 public: 0172 using value_type = ValueType; 0173 using distance_type = std::int16_t; 0174 0175 bucket_entry() noexcept 0176 : bucket_hash(), 0177 m_dist_from_ideal_bucket(EMPTY_MARKER_DIST_FROM_IDEAL_BUCKET), 0178 m_last_bucket(false) { 0179 tsl_rh_assert(empty()); 0180 } 0181 0182 bucket_entry(bool last_bucket) noexcept 0183 : bucket_hash(), 0184 m_dist_from_ideal_bucket(EMPTY_MARKER_DIST_FROM_IDEAL_BUCKET), 0185 m_last_bucket(last_bucket) { 0186 tsl_rh_assert(empty()); 0187 } 0188 0189 bucket_entry(const bucket_entry& other) noexcept( 0190 std::is_nothrow_copy_constructible<value_type>::value) 0191 : bucket_hash(other), 0192 m_dist_from_ideal_bucket(EMPTY_MARKER_DIST_FROM_IDEAL_BUCKET), 0193 m_last_bucket(other.m_last_bucket) { 0194 if (!other.empty()) { 0195 ::new (static_cast<void*>(std::addressof(m_value))) 0196 value_type(other.value()); 0197 m_dist_from_ideal_bucket = other.m_dist_from_ideal_bucket; 0198 } 0199 tsl_rh_assert(empty() == other.empty()); 0200 } 0201 0202 /** 0203 * Never really used, but still necessary as we must call resize on an empty 0204 * `std::vector<bucket_entry>`. and we need to support move-only types. See 0205 * robin_hash constructor for details. 0206 */ 0207 bucket_entry(bucket_entry&& other) noexcept( 0208 std::is_nothrow_move_constructible<value_type>::value) 0209 : bucket_hash(std::move(other)), 0210 m_dist_from_ideal_bucket(EMPTY_MARKER_DIST_FROM_IDEAL_BUCKET), 0211 m_last_bucket(other.m_last_bucket) { 0212 if (!other.empty()) { 0213 ::new (static_cast<void*>(std::addressof(m_value))) 0214 value_type(std::move(other.value())); 0215 m_dist_from_ideal_bucket = other.m_dist_from_ideal_bucket; 0216 } 0217 tsl_rh_assert(empty() == other.empty()); 0218 } 0219 0220 bucket_entry& operator=(const bucket_entry& other) noexcept( 0221 std::is_nothrow_copy_constructible<value_type>::value) { 0222 if (this != &other) { 0223 clear(); 0224 0225 bucket_hash::operator=(other); 0226 if (!other.empty()) { 0227 ::new (static_cast<void*>(std::addressof(m_value))) 0228 value_type(other.value()); 0229 } 0230 0231 m_dist_from_ideal_bucket = other.m_dist_from_ideal_bucket; 0232 m_last_bucket = other.m_last_bucket; 0233 } 0234 0235 return *this; 0236 } 0237 0238 bucket_entry& operator=(bucket_entry&&) = delete; 0239 0240 ~bucket_entry() noexcept { clear(); } 0241 0242 void clear() noexcept { 0243 if (!empty()) { 0244 destroy_value(); 0245 m_dist_from_ideal_bucket = EMPTY_MARKER_DIST_FROM_IDEAL_BUCKET; 0246 } 0247 } 0248 0249 bool empty() const noexcept { 0250 return m_dist_from_ideal_bucket == EMPTY_MARKER_DIST_FROM_IDEAL_BUCKET; 0251 } 0252 0253 value_type& value() noexcept { 0254 tsl_rh_assert(!empty()); 0255 #if defined(__cplusplus) && __cplusplus >= 201703L 0256 return *std::launder( 0257 reinterpret_cast<value_type*>(std::addressof(m_value))); 0258 #else 0259 return *reinterpret_cast<value_type*>(std::addressof(m_value)); 0260 #endif 0261 } 0262 0263 const value_type& value() const noexcept { 0264 tsl_rh_assert(!empty()); 0265 #if defined(__cplusplus) && __cplusplus >= 201703L 0266 return *std::launder( 0267 reinterpret_cast<const value_type*>(std::addressof(m_value))); 0268 #else 0269 return *reinterpret_cast<const value_type*>(std::addressof(m_value)); 0270 #endif 0271 } 0272 0273 distance_type dist_from_ideal_bucket() const noexcept { 0274 return m_dist_from_ideal_bucket; 0275 } 0276 0277 bool last_bucket() const noexcept { return m_last_bucket; } 0278 0279 void set_as_last_bucket() noexcept { m_last_bucket = true; } 0280 0281 template <typename... Args> 0282 void set_value_of_empty_bucket(distance_type dist_from_ideal_bucket, 0283 truncated_hash_type hash, 0284 Args&&... value_type_args) { 0285 tsl_rh_assert(dist_from_ideal_bucket >= 0); 0286 tsl_rh_assert(empty()); 0287 0288 ::new (static_cast<void*>(std::addressof(m_value))) 0289 value_type(std::forward<Args>(value_type_args)...); 0290 this->set_hash(hash); 0291 m_dist_from_ideal_bucket = dist_from_ideal_bucket; 0292 0293 tsl_rh_assert(!empty()); 0294 } 0295 0296 void swap_with_value_in_bucket(distance_type& dist_from_ideal_bucket, 0297 truncated_hash_type& hash, value_type& value) { 0298 tsl_rh_assert(!empty()); 0299 tsl_rh_assert(dist_from_ideal_bucket > m_dist_from_ideal_bucket); 0300 0301 using std::swap; 0302 swap(value, this->value()); 0303 swap(dist_from_ideal_bucket, m_dist_from_ideal_bucket); 0304 0305 if (StoreHash) { 0306 const truncated_hash_type tmp_hash = this->truncated_hash(); 0307 this->set_hash(hash); 0308 hash = tmp_hash; 0309 } else { 0310 // Avoid warning of unused variable if StoreHash is false 0311 TSL_RH_UNUSED(hash); 0312 } 0313 } 0314 0315 static truncated_hash_type truncate_hash(std::size_t hash) noexcept { 0316 return truncated_hash_type(hash); 0317 } 0318 0319 private: 0320 void destroy_value() noexcept { 0321 tsl_rh_assert(!empty()); 0322 value().~value_type(); 0323 } 0324 0325 public: 0326 static const distance_type EMPTY_MARKER_DIST_FROM_IDEAL_BUCKET = -1; 0327 static const distance_type DIST_FROM_IDEAL_BUCKET_LIMIT = 8192; 0328 static_assert(DIST_FROM_IDEAL_BUCKET_LIMIT <= 0329 std::numeric_limits<distance_type>::max() - 1, 0330 "DIST_FROM_IDEAL_BUCKET_LIMIT must be <= " 0331 "std::numeric_limits<distance_type>::max() - 1."); 0332 0333 private: 0334 distance_type m_dist_from_ideal_bucket; 0335 bool m_last_bucket; 0336 alignas(value_type) unsigned char m_value[sizeof(value_type)]; 0337 }; 0338 0339 /** 0340 * Internal common class used by `robin_map` and `robin_set`. 0341 * 0342 * ValueType is what will be stored by `robin_hash` (usually `std::pair<Key, T>` 0343 * for map and `Key` for set). 0344 * 0345 * `KeySelect` should be a `FunctionObject` which takes a `ValueType` in 0346 * parameter and returns a reference to the key. 0347 * 0348 * `ValueSelect` should be a `FunctionObject` which takes a `ValueType` in 0349 * parameter and returns a reference to the value. `ValueSelect` should be void 0350 * if there is no value (in a set for example). 0351 * 0352 * The strong exception guarantee only holds if the expression 0353 * `std::is_nothrow_swappable<ValueType>::value && 0354 * std::is_nothrow_move_constructible<ValueType>::value` is true. 0355 * 0356 * Behaviour is undefined if the destructor of `ValueType` throws. 0357 */ 0358 template <class ValueType, class KeySelect, class ValueSelect, class Hash, 0359 class KeyEqual, class Allocator, bool StoreHash, class GrowthPolicy> 0360 class robin_hash : private Hash, private KeyEqual, private GrowthPolicy { 0361 private: 0362 template <typename U> 0363 using has_mapped_type = 0364 typename std::integral_constant<bool, !std::is_same<U, void>::value>; 0365 0366 static_assert( 0367 noexcept(std::declval<GrowthPolicy>().bucket_for_hash(std::size_t(0))), 0368 "GrowthPolicy::bucket_for_hash must be noexcept."); 0369 static_assert(noexcept(std::declval<GrowthPolicy>().clear()), 0370 "GrowthPolicy::clear must be noexcept."); 0371 0372 public: 0373 template <bool IsConst> 0374 class robin_iterator; 0375 0376 using key_type = typename KeySelect::key_type; 0377 using value_type = ValueType; 0378 using size_type = std::size_t; 0379 using difference_type = std::ptrdiff_t; 0380 using hasher = Hash; 0381 using key_equal = KeyEqual; 0382 using allocator_type = Allocator; 0383 using reference = value_type&; 0384 using const_reference = const value_type&; 0385 using pointer = value_type*; 0386 using const_pointer = const value_type*; 0387 using iterator = robin_iterator<false>; 0388 using const_iterator = robin_iterator<true>; 0389 0390 private: 0391 /** 0392 * Either store the hash because we are asked by the `StoreHash` template 0393 * parameter or store the hash because it doesn't cost us anything in size and 0394 * can be used to speed up rehash. 0395 */ 0396 static constexpr bool STORE_HASH = 0397 StoreHash || 0398 ((sizeof(tsl::detail_robin_hash::bucket_entry<value_type, true>) == 0399 sizeof(tsl::detail_robin_hash::bucket_entry<value_type, false>)) && 0400 (sizeof(std::size_t) == sizeof(truncated_hash_type) || 0401 is_power_of_two_policy<GrowthPolicy>::value) && 0402 // Don't store the hash for primitive types with default hash. 0403 (!std::is_arithmetic<key_type>::value || 0404 !std::is_same<Hash, std::hash<key_type>>::value)); 0405 0406 /** 0407 * Only use the stored hash on lookup if we are explicitly asked. We are not 0408 * sure how slow the KeyEqual operation is. An extra comparison may slow 0409 * things down with a fast KeyEqual. 0410 */ 0411 static constexpr bool USE_STORED_HASH_ON_LOOKUP = StoreHash; 0412 0413 /** 0414 * We can only use the hash on rehash if the size of the hash type is the same 0415 * as the stored one or if we use a power of two modulo. In the case of the 0416 * power of two modulo, we just mask the least significant bytes, we just have 0417 * to check that the truncated_hash_type didn't truncated more bytes. 0418 */ 0419 static bool USE_STORED_HASH_ON_REHASH(size_type bucket_count) { 0420 if (STORE_HASH && sizeof(std::size_t) == sizeof(truncated_hash_type)) { 0421 TSL_RH_UNUSED(bucket_count); 0422 return true; 0423 } else if (STORE_HASH && is_power_of_two_policy<GrowthPolicy>::value) { 0424 return bucket_count == 0 || 0425 (bucket_count - 1) <= 0426 std::numeric_limits<truncated_hash_type>::max(); 0427 } else { 0428 TSL_RH_UNUSED(bucket_count); 0429 return false; 0430 } 0431 } 0432 0433 using bucket_entry = 0434 tsl::detail_robin_hash::bucket_entry<value_type, STORE_HASH>; 0435 using distance_type = typename bucket_entry::distance_type; 0436 0437 using buckets_allocator = typename std::allocator_traits< 0438 allocator_type>::template rebind_alloc<bucket_entry>; 0439 using buckets_container_type = std::vector<bucket_entry, buckets_allocator>; 0440 0441 public: 0442 /** 0443 * The 'operator*()' and 'operator->()' methods return a const reference and 0444 * const pointer respectively to the stored value type. 0445 * 0446 * In case of a map, to get a mutable reference to the value associated to a 0447 * key (the '.second' in the stored pair), you have to call 'value()'. 0448 * 0449 * The main reason for this is that if we returned a `std::pair<Key, T>&` 0450 * instead of a `const std::pair<Key, T>&`, the user may modify the key which 0451 * will put the map in a undefined state. 0452 */ 0453 template <bool IsConst> 0454 class robin_iterator { 0455 friend class robin_hash; 0456 0457 private: 0458 using bucket_entry_ptr = 0459 typename std::conditional<IsConst, const bucket_entry*, 0460 bucket_entry*>::type; 0461 0462 robin_iterator(bucket_entry_ptr bucket) noexcept : m_bucket(bucket) {} 0463 0464 public: 0465 using iterator_category = std::forward_iterator_tag; 0466 using value_type = const typename robin_hash::value_type; 0467 using difference_type = std::ptrdiff_t; 0468 using reference = value_type&; 0469 using pointer = value_type*; 0470 0471 robin_iterator() noexcept {} 0472 0473 // Copy constructor from iterator to const_iterator. 0474 template <bool TIsConst = IsConst, 0475 typename std::enable_if<TIsConst>::type* = nullptr> 0476 robin_iterator(const robin_iterator<!TIsConst>& other) noexcept 0477 : m_bucket(other.m_bucket) {} 0478 0479 robin_iterator(const robin_iterator& other) = default; 0480 robin_iterator(robin_iterator&& other) = default; 0481 robin_iterator& operator=(const robin_iterator& other) = default; 0482 robin_iterator& operator=(robin_iterator&& other) = default; 0483 0484 const typename robin_hash::key_type& key() const { 0485 return KeySelect()(m_bucket->value()); 0486 } 0487 0488 template <class U = ValueSelect, 0489 typename std::enable_if<has_mapped_type<U>::value && 0490 IsConst>::type* = nullptr> 0491 const typename U::value_type& value() const { 0492 return U()(m_bucket->value()); 0493 } 0494 0495 template <class U = ValueSelect, 0496 typename std::enable_if<has_mapped_type<U>::value && 0497 !IsConst>::type* = nullptr> 0498 typename U::value_type& value() const { 0499 return U()(m_bucket->value()); 0500 } 0501 0502 reference operator*() const { return m_bucket->value(); } 0503 0504 pointer operator->() const { return std::addressof(m_bucket->value()); } 0505 0506 robin_iterator& operator++() { 0507 while (true) { 0508 if (m_bucket->last_bucket()) { 0509 ++m_bucket; 0510 return *this; 0511 } 0512 0513 ++m_bucket; 0514 if (!m_bucket->empty()) { 0515 return *this; 0516 } 0517 } 0518 } 0519 0520 robin_iterator operator++(int) { 0521 robin_iterator tmp(*this); 0522 ++*this; 0523 0524 return tmp; 0525 } 0526 0527 friend bool operator==(const robin_iterator& lhs, 0528 const robin_iterator& rhs) { 0529 return lhs.m_bucket == rhs.m_bucket; 0530 } 0531 0532 friend bool operator!=(const robin_iterator& lhs, 0533 const robin_iterator& rhs) { 0534 return !(lhs == rhs); 0535 } 0536 0537 private: 0538 bucket_entry_ptr m_bucket; 0539 }; 0540 0541 public: 0542 #if defined(__cplusplus) && __cplusplus >= 201402L 0543 robin_hash(size_type bucket_count, const Hash& hash, const KeyEqual& equal, 0544 const Allocator& alloc, 0545 float min_load_factor = DEFAULT_MIN_LOAD_FACTOR, 0546 float max_load_factor = DEFAULT_MAX_LOAD_FACTOR) 0547 : Hash(hash), 0548 KeyEqual(equal), 0549 GrowthPolicy(bucket_count), 0550 m_buckets_data(bucket_count, alloc), 0551 m_buckets(m_buckets_data.empty() ? static_empty_bucket_ptr() 0552 : m_buckets_data.data()), 0553 m_bucket_count(bucket_count), 0554 m_nb_elements(0), 0555 m_grow_on_next_insert(false), 0556 m_try_shrink_on_next_insert(false) { 0557 if (bucket_count > max_bucket_count()) { 0558 TSL_RH_THROW_OR_TERMINATE(std::length_error, 0559 "The map exceeds its maximum bucket count."); 0560 } 0561 0562 if (m_bucket_count > 0) { 0563 tsl_rh_assert(!m_buckets_data.empty()); 0564 m_buckets_data.back().set_as_last_bucket(); 0565 } 0566 0567 this->min_load_factor(min_load_factor); 0568 this->max_load_factor(max_load_factor); 0569 } 0570 #else 0571 /** 0572 * C++11 doesn't support the creation of a std::vector with a custom allocator 0573 * and 'count' default-inserted elements. The needed contructor `explicit 0574 * vector(size_type count, const Allocator& alloc = Allocator());` is only 0575 * available in C++14 and later. We thus must resize after using the 0576 * `vector(const Allocator& alloc)` constructor. 0577 * 0578 * We can't use `vector(size_type count, const T& value, const Allocator& 0579 * alloc)` as it requires the value T to be copyable. 0580 */ 0581 robin_hash(size_type bucket_count, const Hash& hash, const KeyEqual& equal, 0582 const Allocator& alloc, 0583 float min_load_factor = DEFAULT_MIN_LOAD_FACTOR, 0584 float max_load_factor = DEFAULT_MAX_LOAD_FACTOR) 0585 : Hash(hash), 0586 KeyEqual(equal), 0587 GrowthPolicy(bucket_count), 0588 m_buckets_data(alloc), 0589 m_buckets(static_empty_bucket_ptr()), 0590 m_bucket_count(bucket_count), 0591 m_nb_elements(0), 0592 m_grow_on_next_insert(false), 0593 m_try_shrink_on_next_insert(false) { 0594 if (bucket_count > max_bucket_count()) { 0595 TSL_RH_THROW_OR_TERMINATE(std::length_error, 0596 "The map exceeds its maximum bucket count."); 0597 } 0598 0599 if (m_bucket_count > 0) { 0600 m_buckets_data.resize(m_bucket_count); 0601 m_buckets = m_buckets_data.data(); 0602 0603 tsl_rh_assert(!m_buckets_data.empty()); 0604 m_buckets_data.back().set_as_last_bucket(); 0605 } 0606 0607 this->min_load_factor(min_load_factor); 0608 this->max_load_factor(max_load_factor); 0609 } 0610 #endif 0611 0612 robin_hash(const robin_hash& other) 0613 : Hash(other), 0614 KeyEqual(other), 0615 GrowthPolicy(other), 0616 m_buckets_data(other.m_buckets_data), 0617 m_buckets(m_buckets_data.empty() ? static_empty_bucket_ptr() 0618 : m_buckets_data.data()), 0619 m_bucket_count(other.m_bucket_count), 0620 m_nb_elements(other.m_nb_elements), 0621 m_load_threshold(other.m_load_threshold), 0622 m_min_load_factor(other.m_min_load_factor), 0623 m_max_load_factor(other.m_max_load_factor), 0624 m_grow_on_next_insert(other.m_grow_on_next_insert), 0625 m_try_shrink_on_next_insert(other.m_try_shrink_on_next_insert) {} 0626 0627 robin_hash(robin_hash&& other) noexcept( 0628 std::is_nothrow_move_constructible< 0629 Hash>::value&& std::is_nothrow_move_constructible<KeyEqual>::value&& 0630 std::is_nothrow_move_constructible<GrowthPolicy>::value&& 0631 std::is_nothrow_move_constructible<buckets_container_type>::value) 0632 : Hash(std::move(static_cast<Hash&>(other))), 0633 KeyEqual(std::move(static_cast<KeyEqual&>(other))), 0634 GrowthPolicy(std::move(static_cast<GrowthPolicy&>(other))), 0635 m_buckets_data(std::move(other.m_buckets_data)), 0636 m_buckets(m_buckets_data.empty() ? static_empty_bucket_ptr() 0637 : m_buckets_data.data()), 0638 m_bucket_count(other.m_bucket_count), 0639 m_nb_elements(other.m_nb_elements), 0640 m_load_threshold(other.m_load_threshold), 0641 m_min_load_factor(other.m_min_load_factor), 0642 m_max_load_factor(other.m_max_load_factor), 0643 m_grow_on_next_insert(other.m_grow_on_next_insert), 0644 m_try_shrink_on_next_insert(other.m_try_shrink_on_next_insert) { 0645 other.clear_and_shrink(); 0646 } 0647 0648 robin_hash& operator=(const robin_hash& other) { 0649 if (&other != this) { 0650 Hash::operator=(other); 0651 KeyEqual::operator=(other); 0652 GrowthPolicy::operator=(other); 0653 0654 m_buckets_data = other.m_buckets_data; 0655 m_buckets = m_buckets_data.empty() ? static_empty_bucket_ptr() 0656 : m_buckets_data.data(); 0657 m_bucket_count = other.m_bucket_count; 0658 m_nb_elements = other.m_nb_elements; 0659 0660 m_load_threshold = other.m_load_threshold; 0661 m_min_load_factor = other.m_min_load_factor; 0662 m_max_load_factor = other.m_max_load_factor; 0663 0664 m_grow_on_next_insert = other.m_grow_on_next_insert; 0665 m_try_shrink_on_next_insert = other.m_try_shrink_on_next_insert; 0666 } 0667 0668 return *this; 0669 } 0670 0671 robin_hash& operator=(robin_hash&& other) { 0672 other.swap(*this); 0673 other.clear_and_shrink(); 0674 0675 return *this; 0676 } 0677 0678 allocator_type get_allocator() const { 0679 return m_buckets_data.get_allocator(); 0680 } 0681 0682 /* 0683 * Iterators 0684 */ 0685 iterator begin() noexcept { 0686 std::size_t i = 0; 0687 while (i < m_bucket_count && m_buckets[i].empty()) { 0688 i++; 0689 } 0690 0691 return iterator(m_buckets + i); 0692 } 0693 0694 const_iterator begin() const noexcept { return cbegin(); } 0695 0696 const_iterator cbegin() const noexcept { 0697 std::size_t i = 0; 0698 while (i < m_bucket_count && m_buckets[i].empty()) { 0699 i++; 0700 } 0701 0702 return const_iterator(m_buckets + i); 0703 } 0704 0705 iterator end() noexcept { return iterator(m_buckets + m_bucket_count); } 0706 0707 const_iterator end() const noexcept { return cend(); } 0708 0709 const_iterator cend() const noexcept { 0710 return const_iterator(m_buckets + m_bucket_count); 0711 } 0712 0713 /* 0714 * Capacity 0715 */ 0716 bool empty() const noexcept { return m_nb_elements == 0; } 0717 0718 size_type size() const noexcept { return m_nb_elements; } 0719 0720 size_type max_size() const noexcept { return m_buckets_data.max_size(); } 0721 0722 /* 0723 * Modifiers 0724 */ 0725 void clear() noexcept { 0726 if (m_min_load_factor > 0.0f) { 0727 clear_and_shrink(); 0728 } else { 0729 for (auto& bucket : m_buckets_data) { 0730 bucket.clear(); 0731 } 0732 0733 m_nb_elements = 0; 0734 m_grow_on_next_insert = false; 0735 } 0736 } 0737 0738 template <typename P> 0739 std::pair<iterator, bool> insert(P&& value) { 0740 return insert_impl(KeySelect()(value), std::forward<P>(value)); 0741 } 0742 0743 template <typename P> 0744 iterator insert_hint(const_iterator hint, P&& value) { 0745 if (hint != cend() && 0746 compare_keys(KeySelect()(*hint), KeySelect()(value))) { 0747 return mutable_iterator(hint); 0748 } 0749 0750 return insert(std::forward<P>(value)).first; 0751 } 0752 0753 template <class InputIt> 0754 void insert(InputIt first, InputIt last) { 0755 if (std::is_base_of< 0756 std::forward_iterator_tag, 0757 typename std::iterator_traits<InputIt>::iterator_category>::value) { 0758 const auto nb_elements_insert = std::distance(first, last); 0759 const size_type nb_free_buckets = m_load_threshold - size(); 0760 tsl_rh_assert(m_load_threshold >= size()); 0761 0762 if (nb_elements_insert > 0 && 0763 nb_free_buckets < size_type(nb_elements_insert)) { 0764 reserve(size() + size_type(nb_elements_insert)); 0765 } 0766 } 0767 0768 for (; first != last; ++first) { 0769 insert(*first); 0770 } 0771 } 0772 0773 template <class K, class M> 0774 std::pair<iterator, bool> insert_or_assign(K&& key, M&& obj) { 0775 auto it = try_emplace(std::forward<K>(key), std::forward<M>(obj)); 0776 if (!it.second) { 0777 it.first.value() = std::forward<M>(obj); 0778 } 0779 0780 return it; 0781 } 0782 0783 template <class K, class M> 0784 iterator insert_or_assign(const_iterator hint, K&& key, M&& obj) { 0785 if (hint != cend() && compare_keys(KeySelect()(*hint), key)) { 0786 auto it = mutable_iterator(hint); 0787 it.value() = std::forward<M>(obj); 0788 0789 return it; 0790 } 0791 0792 return insert_or_assign(std::forward<K>(key), std::forward<M>(obj)).first; 0793 } 0794 0795 template <class... Args> 0796 std::pair<iterator, bool> emplace(Args&&... args) { 0797 return insert(value_type(std::forward<Args>(args)...)); 0798 } 0799 0800 template <class... Args> 0801 iterator emplace_hint(const_iterator hint, Args&&... args) { 0802 return insert_hint(hint, value_type(std::forward<Args>(args)...)); 0803 } 0804 0805 template <class K, class... Args> 0806 std::pair<iterator, bool> try_emplace(K&& key, Args&&... args) { 0807 return insert_impl(key, std::piecewise_construct, 0808 std::forward_as_tuple(std::forward<K>(key)), 0809 std::forward_as_tuple(std::forward<Args>(args)...)); 0810 } 0811 0812 template <class K, class... Args> 0813 iterator try_emplace_hint(const_iterator hint, K&& key, Args&&... args) { 0814 if (hint != cend() && compare_keys(KeySelect()(*hint), key)) { 0815 return mutable_iterator(hint); 0816 } 0817 0818 return try_emplace(std::forward<K>(key), std::forward<Args>(args)...).first; 0819 } 0820 0821 /** 0822 * Here to avoid `template<class K> size_type erase(const K& key)` being used 0823 * when we use an `iterator` instead of a `const_iterator`. 0824 */ 0825 iterator erase(iterator pos) { 0826 erase_from_bucket(pos); 0827 0828 /** 0829 * Erase bucket used a backward shift after clearing the bucket. 0830 * Check if there is a new value in the bucket, if not get the next 0831 * non-empty. 0832 */ 0833 if (pos.m_bucket->empty()) { 0834 ++pos; 0835 } 0836 0837 m_try_shrink_on_next_insert = true; 0838 0839 return pos; 0840 } 0841 0842 iterator erase(const_iterator pos) { return erase(mutable_iterator(pos)); } 0843 0844 iterator erase(const_iterator first, const_iterator last) { 0845 if (first == last) { 0846 return mutable_iterator(first); 0847 } 0848 0849 auto first_mutable = mutable_iterator(first); 0850 auto last_mutable = mutable_iterator(last); 0851 for (auto it = first_mutable.m_bucket; it != last_mutable.m_bucket; ++it) { 0852 if (!it->empty()) { 0853 it->clear(); 0854 m_nb_elements--; 0855 } 0856 } 0857 0858 if (last_mutable == end()) { 0859 m_try_shrink_on_next_insert = true; 0860 return end(); 0861 } 0862 0863 /* 0864 * Backward shift on the values which come after the deleted values. 0865 * We try to move the values closer to their ideal bucket. 0866 */ 0867 std::size_t icloser_bucket = 0868 static_cast<std::size_t>(first_mutable.m_bucket - m_buckets); 0869 std::size_t ito_move_closer_value = 0870 static_cast<std::size_t>(last_mutable.m_bucket - m_buckets); 0871 tsl_rh_assert(ito_move_closer_value > icloser_bucket); 0872 0873 const std::size_t ireturn_bucket = 0874 ito_move_closer_value - 0875 std::min( 0876 ito_move_closer_value - icloser_bucket, 0877 std::size_t( 0878 m_buckets[ito_move_closer_value].dist_from_ideal_bucket())); 0879 0880 while (ito_move_closer_value < m_bucket_count && 0881 m_buckets[ito_move_closer_value].dist_from_ideal_bucket() > 0) { 0882 icloser_bucket = 0883 ito_move_closer_value - 0884 std::min( 0885 ito_move_closer_value - icloser_bucket, 0886 std::size_t( 0887 m_buckets[ito_move_closer_value].dist_from_ideal_bucket())); 0888 0889 tsl_rh_assert(m_buckets[icloser_bucket].empty()); 0890 const distance_type new_distance = distance_type( 0891 m_buckets[ito_move_closer_value].dist_from_ideal_bucket() - 0892 (ito_move_closer_value - icloser_bucket)); 0893 m_buckets[icloser_bucket].set_value_of_empty_bucket( 0894 new_distance, m_buckets[ito_move_closer_value].truncated_hash(), 0895 std::move(m_buckets[ito_move_closer_value].value())); 0896 m_buckets[ito_move_closer_value].clear(); 0897 0898 ++icloser_bucket; 0899 ++ito_move_closer_value; 0900 } 0901 0902 m_try_shrink_on_next_insert = true; 0903 0904 return iterator(m_buckets + ireturn_bucket); 0905 } 0906 0907 template <class K> 0908 size_type erase(const K& key) { 0909 return erase(key, hash_key(key)); 0910 } 0911 0912 template <class K> 0913 size_type erase(const K& key, std::size_t hash) { 0914 auto it = find(key, hash); 0915 if (it != end()) { 0916 erase_from_bucket(it); 0917 m_try_shrink_on_next_insert = true; 0918 0919 return 1; 0920 } else { 0921 return 0; 0922 } 0923 } 0924 0925 void swap(robin_hash& other) { 0926 using std::swap; 0927 0928 swap(static_cast<Hash&>(*this), static_cast<Hash&>(other)); 0929 swap(static_cast<KeyEqual&>(*this), static_cast<KeyEqual&>(other)); 0930 swap(static_cast<GrowthPolicy&>(*this), static_cast<GrowthPolicy&>(other)); 0931 swap(m_buckets_data, other.m_buckets_data); 0932 swap(m_buckets, other.m_buckets); 0933 swap(m_bucket_count, other.m_bucket_count); 0934 swap(m_nb_elements, other.m_nb_elements); 0935 swap(m_load_threshold, other.m_load_threshold); 0936 swap(m_min_load_factor, other.m_min_load_factor); 0937 swap(m_max_load_factor, other.m_max_load_factor); 0938 swap(m_grow_on_next_insert, other.m_grow_on_next_insert); 0939 swap(m_try_shrink_on_next_insert, other.m_try_shrink_on_next_insert); 0940 } 0941 0942 /* 0943 * Lookup 0944 */ 0945 template <class K, class U = ValueSelect, 0946 typename std::enable_if<has_mapped_type<U>::value>::type* = nullptr> 0947 typename U::value_type& at(const K& key) { 0948 return at(key, hash_key(key)); 0949 } 0950 0951 template <class K, class U = ValueSelect, 0952 typename std::enable_if<has_mapped_type<U>::value>::type* = nullptr> 0953 typename U::value_type& at(const K& key, std::size_t hash) { 0954 return const_cast<typename U::value_type&>( 0955 static_cast<const robin_hash*>(this)->at(key, hash)); 0956 } 0957 0958 template <class K, class U = ValueSelect, 0959 typename std::enable_if<has_mapped_type<U>::value>::type* = nullptr> 0960 const typename U::value_type& at(const K& key) const { 0961 return at(key, hash_key(key)); 0962 } 0963 0964 template <class K, class U = ValueSelect, 0965 typename std::enable_if<has_mapped_type<U>::value>::type* = nullptr> 0966 const typename U::value_type& at(const K& key, std::size_t hash) const { 0967 auto it = find(key, hash); 0968 if (it != cend()) { 0969 return it.value(); 0970 } else { 0971 TSL_RH_THROW_OR_TERMINATE(std::out_of_range, "Couldn't find key."); 0972 } 0973 } 0974 0975 template <class K, class U = ValueSelect, 0976 typename std::enable_if<has_mapped_type<U>::value>::type* = nullptr> 0977 typename U::value_type& operator[](K&& key) { 0978 return try_emplace(std::forward<K>(key)).first.value(); 0979 } 0980 0981 template <class K> 0982 size_type count(const K& key) const { 0983 return count(key, hash_key(key)); 0984 } 0985 0986 template <class K> 0987 size_type count(const K& key, std::size_t hash) const { 0988 if (find(key, hash) != cend()) { 0989 return 1; 0990 } else { 0991 return 0; 0992 } 0993 } 0994 0995 template <class K> 0996 iterator find(const K& key) { 0997 return find_impl(key, hash_key(key)); 0998 } 0999 1000 template <class K> 1001 iterator find(const K& key, std::size_t hash) { 1002 return find_impl(key, hash); 1003 } 1004 1005 template <class K> 1006 const_iterator find(const K& key) const { 1007 return find_impl(key, hash_key(key)); 1008 } 1009 1010 template <class K> 1011 const_iterator find(const K& key, std::size_t hash) const { 1012 return find_impl(key, hash); 1013 } 1014 1015 template <class K> 1016 bool contains(const K& key) const { 1017 return contains(key, hash_key(key)); 1018 } 1019 1020 template <class K> 1021 bool contains(const K& key, std::size_t hash) const { 1022 return count(key, hash) != 0; 1023 } 1024 1025 template <class K> 1026 std::pair<iterator, iterator> equal_range(const K& key) { 1027 return equal_range(key, hash_key(key)); 1028 } 1029 1030 template <class K> 1031 std::pair<iterator, iterator> equal_range(const K& key, std::size_t hash) { 1032 iterator it = find(key, hash); 1033 return std::make_pair(it, (it == end()) ? it : std::next(it)); 1034 } 1035 1036 template <class K> 1037 std::pair<const_iterator, const_iterator> equal_range(const K& key) const { 1038 return equal_range(key, hash_key(key)); 1039 } 1040 1041 template <class K> 1042 std::pair<const_iterator, const_iterator> equal_range( 1043 const K& key, std::size_t hash) const { 1044 const_iterator it = find(key, hash); 1045 return std::make_pair(it, (it == cend()) ? it : std::next(it)); 1046 } 1047 1048 /* 1049 * Bucket interface 1050 */ 1051 size_type bucket_count() const { return m_bucket_count; } 1052 1053 size_type max_bucket_count() const { 1054 return std::min(GrowthPolicy::max_bucket_count(), 1055 m_buckets_data.max_size()); 1056 } 1057 1058 /* 1059 * Hash policy 1060 */ 1061 float load_factor() const { 1062 if (bucket_count() == 0) { 1063 return 0; 1064 } 1065 1066 return float(m_nb_elements) / float(bucket_count()); 1067 } 1068 1069 float min_load_factor() const { return m_min_load_factor; } 1070 1071 float max_load_factor() const { return m_max_load_factor; } 1072 1073 void min_load_factor(float ml) { 1074 m_min_load_factor = clamp(ml, float(MINIMUM_MIN_LOAD_FACTOR), 1075 float(MAXIMUM_MIN_LOAD_FACTOR)); 1076 } 1077 1078 void max_load_factor(float ml) { 1079 m_max_load_factor = clamp(ml, float(MINIMUM_MAX_LOAD_FACTOR), 1080 float(MAXIMUM_MAX_LOAD_FACTOR)); 1081 m_load_threshold = size_type(float(bucket_count()) * m_max_load_factor); 1082 tsl_rh_assert(bucket_count() == 0 || m_load_threshold < bucket_count()); 1083 } 1084 1085 void rehash(size_type count_) { 1086 count_ = std::max(count_, 1087 size_type(std::ceil(float(size()) / max_load_factor()))); 1088 rehash_impl(count_); 1089 } 1090 1091 void reserve(size_type count_) { 1092 rehash(size_type(std::ceil(float(count_) / max_load_factor()))); 1093 } 1094 1095 /* 1096 * Observers 1097 */ 1098 hasher hash_function() const { return static_cast<const Hash&>(*this); } 1099 1100 key_equal key_eq() const { return static_cast<const KeyEqual&>(*this); } 1101 1102 /* 1103 * Other 1104 */ 1105 iterator mutable_iterator(const_iterator pos) { 1106 return iterator(const_cast<bucket_entry*>(pos.m_bucket)); 1107 } 1108 1109 template <class Serializer> 1110 void serialize(Serializer& serializer) const { 1111 serialize_impl(serializer); 1112 } 1113 1114 template <class Deserializer> 1115 void deserialize(Deserializer& deserializer, bool hash_compatible) { 1116 deserialize_impl(deserializer, hash_compatible); 1117 } 1118 1119 private: 1120 template <class K> 1121 std::size_t hash_key(const K& key) const { 1122 return Hash::operator()(key); 1123 } 1124 1125 template <class K1, class K2> 1126 bool compare_keys(const K1& key1, const K2& key2) const { 1127 return KeyEqual::operator()(key1, key2); 1128 } 1129 1130 std::size_t bucket_for_hash(std::size_t hash) const { 1131 const std::size_t bucket = GrowthPolicy::bucket_for_hash(hash); 1132 tsl_rh_assert(bucket < m_bucket_count || 1133 (bucket == 0 && m_bucket_count == 0)); 1134 1135 return bucket; 1136 } 1137 1138 template <class U = GrowthPolicy, 1139 typename std::enable_if<is_power_of_two_policy<U>::value>::type* = 1140 nullptr> 1141 std::size_t next_bucket(std::size_t index) const noexcept { 1142 tsl_rh_assert(index < bucket_count()); 1143 1144 return (index + 1) & this->m_mask; 1145 } 1146 1147 template <class U = GrowthPolicy, 1148 typename std::enable_if<!is_power_of_two_policy<U>::value>::type* = 1149 nullptr> 1150 std::size_t next_bucket(std::size_t index) const noexcept { 1151 tsl_rh_assert(index < bucket_count()); 1152 1153 index++; 1154 return (index != bucket_count()) ? index : 0; 1155 } 1156 1157 template <class K> 1158 iterator find_impl(const K& key, std::size_t hash) { 1159 return mutable_iterator( 1160 static_cast<const robin_hash*>(this)->find(key, hash)); 1161 } 1162 1163 template <class K> 1164 const_iterator find_impl(const K& key, std::size_t hash) const { 1165 std::size_t ibucket = bucket_for_hash(hash); 1166 distance_type dist_from_ideal_bucket = 0; 1167 1168 while (dist_from_ideal_bucket <= 1169 m_buckets[ibucket].dist_from_ideal_bucket()) { 1170 if (TSL_RH_LIKELY( 1171 (!USE_STORED_HASH_ON_LOOKUP || 1172 m_buckets[ibucket].bucket_hash_equal(hash)) && 1173 compare_keys(KeySelect()(m_buckets[ibucket].value()), key))) { 1174 return const_iterator(m_buckets + ibucket); 1175 } 1176 1177 ibucket = next_bucket(ibucket); 1178 dist_from_ideal_bucket++; 1179 } 1180 1181 return cend(); 1182 } 1183 1184 void erase_from_bucket(iterator pos) { 1185 pos.m_bucket->clear(); 1186 m_nb_elements--; 1187 1188 /** 1189 * Backward shift, swap the empty bucket, previous_ibucket, with the values 1190 * on its right, ibucket, until we cross another empty bucket or if the 1191 * other bucket has a distance_from_ideal_bucket == 0. 1192 * 1193 * We try to move the values closer to their ideal bucket. 1194 */ 1195 std::size_t previous_ibucket = 1196 static_cast<std::size_t>(pos.m_bucket - m_buckets); 1197 std::size_t ibucket = next_bucket(previous_ibucket); 1198 1199 while (m_buckets[ibucket].dist_from_ideal_bucket() > 0) { 1200 tsl_rh_assert(m_buckets[previous_ibucket].empty()); 1201 1202 const distance_type new_distance = 1203 distance_type(m_buckets[ibucket].dist_from_ideal_bucket() - 1); 1204 m_buckets[previous_ibucket].set_value_of_empty_bucket( 1205 new_distance, m_buckets[ibucket].truncated_hash(), 1206 std::move(m_buckets[ibucket].value())); 1207 m_buckets[ibucket].clear(); 1208 1209 previous_ibucket = ibucket; 1210 ibucket = next_bucket(ibucket); 1211 } 1212 } 1213 1214 template <class K, class... Args> 1215 std::pair<iterator, bool> insert_impl(const K& key, 1216 Args&&... value_type_args) { 1217 const std::size_t hash = hash_key(key); 1218 1219 std::size_t ibucket = bucket_for_hash(hash); 1220 distance_type dist_from_ideal_bucket = 0; 1221 1222 while (dist_from_ideal_bucket <= 1223 m_buckets[ibucket].dist_from_ideal_bucket()) { 1224 if ((!USE_STORED_HASH_ON_LOOKUP || 1225 m_buckets[ibucket].bucket_hash_equal(hash)) && 1226 compare_keys(KeySelect()(m_buckets[ibucket].value()), key)) { 1227 return std::make_pair(iterator(m_buckets + ibucket), false); 1228 } 1229 1230 ibucket = next_bucket(ibucket); 1231 dist_from_ideal_bucket++; 1232 } 1233 1234 while (rehash_on_extreme_load(dist_from_ideal_bucket)) { 1235 ibucket = bucket_for_hash(hash); 1236 dist_from_ideal_bucket = 0; 1237 1238 while (dist_from_ideal_bucket <= 1239 m_buckets[ibucket].dist_from_ideal_bucket()) { 1240 ibucket = next_bucket(ibucket); 1241 dist_from_ideal_bucket++; 1242 } 1243 } 1244 1245 if (m_buckets[ibucket].empty()) { 1246 m_buckets[ibucket].set_value_of_empty_bucket( 1247 dist_from_ideal_bucket, bucket_entry::truncate_hash(hash), 1248 std::forward<Args>(value_type_args)...); 1249 } else { 1250 insert_value(ibucket, dist_from_ideal_bucket, 1251 bucket_entry::truncate_hash(hash), 1252 std::forward<Args>(value_type_args)...); 1253 } 1254 1255 m_nb_elements++; 1256 /* 1257 * The value will be inserted in ibucket in any case, either because it was 1258 * empty or by stealing the bucket (robin hood). 1259 */ 1260 return std::make_pair(iterator(m_buckets + ibucket), true); 1261 } 1262 1263 template <class... Args> 1264 void insert_value(std::size_t ibucket, distance_type dist_from_ideal_bucket, 1265 truncated_hash_type hash, Args&&... value_type_args) { 1266 value_type value(std::forward<Args>(value_type_args)...); 1267 insert_value_impl(ibucket, dist_from_ideal_bucket, hash, value); 1268 } 1269 1270 void insert_value(std::size_t ibucket, distance_type dist_from_ideal_bucket, 1271 truncated_hash_type hash, value_type&& value) { 1272 insert_value_impl(ibucket, dist_from_ideal_bucket, hash, value); 1273 } 1274 1275 /* 1276 * We don't use `value_type&& value` as last argument due to a bug in MSVC 1277 * when `value_type` is a pointer, The compiler is not able to see the 1278 * difference between `std::string*` and `std::string*&&` resulting in a 1279 * compilation error. 1280 * 1281 * The `value` will be in a moved state at the end of the function. 1282 */ 1283 void insert_value_impl(std::size_t ibucket, 1284 distance_type dist_from_ideal_bucket, 1285 truncated_hash_type hash, value_type& value) { 1286 tsl_rh_assert(dist_from_ideal_bucket > 1287 m_buckets[ibucket].dist_from_ideal_bucket()); 1288 m_buckets[ibucket].swap_with_value_in_bucket(dist_from_ideal_bucket, hash, 1289 value); 1290 ibucket = next_bucket(ibucket); 1291 dist_from_ideal_bucket++; 1292 1293 while (!m_buckets[ibucket].empty()) { 1294 if (dist_from_ideal_bucket > 1295 m_buckets[ibucket].dist_from_ideal_bucket()) { 1296 if (dist_from_ideal_bucket > 1297 bucket_entry::DIST_FROM_IDEAL_BUCKET_LIMIT) { 1298 /** 1299 * The number of probes is really high, rehash the map on the next 1300 * insert. Difficult to do now as rehash may throw an exception. 1301 */ 1302 m_grow_on_next_insert = true; 1303 } 1304 1305 m_buckets[ibucket].swap_with_value_in_bucket(dist_from_ideal_bucket, 1306 hash, value); 1307 } 1308 1309 ibucket = next_bucket(ibucket); 1310 dist_from_ideal_bucket++; 1311 } 1312 1313 m_buckets[ibucket].set_value_of_empty_bucket(dist_from_ideal_bucket, hash, 1314 std::move(value)); 1315 } 1316 1317 void rehash_impl(size_type count_) { 1318 robin_hash new_table(count_, static_cast<Hash&>(*this), 1319 static_cast<KeyEqual&>(*this), get_allocator(), 1320 m_min_load_factor, m_max_load_factor); 1321 tsl_rh_assert(size() <= new_table.m_load_threshold); 1322 1323 const bool use_stored_hash = 1324 USE_STORED_HASH_ON_REHASH(new_table.bucket_count()); 1325 for (auto& bucket : m_buckets_data) { 1326 if (bucket.empty()) { 1327 continue; 1328 } 1329 1330 const std::size_t hash = 1331 use_stored_hash ? bucket.truncated_hash() 1332 : new_table.hash_key(KeySelect()(bucket.value())); 1333 1334 new_table.insert_value_on_rehash(new_table.bucket_for_hash(hash), 0, 1335 bucket_entry::truncate_hash(hash), 1336 std::move(bucket.value())); 1337 } 1338 1339 new_table.m_nb_elements = m_nb_elements; 1340 new_table.swap(*this); 1341 } 1342 1343 void clear_and_shrink() noexcept { 1344 GrowthPolicy::clear(); 1345 m_buckets_data.clear(); 1346 m_buckets = static_empty_bucket_ptr(); 1347 m_bucket_count = 0; 1348 m_nb_elements = 0; 1349 m_load_threshold = 0; 1350 m_grow_on_next_insert = false; 1351 m_try_shrink_on_next_insert = false; 1352 } 1353 1354 void insert_value_on_rehash(std::size_t ibucket, 1355 distance_type dist_from_ideal_bucket, 1356 truncated_hash_type hash, value_type&& value) { 1357 while (true) { 1358 if (dist_from_ideal_bucket > 1359 m_buckets[ibucket].dist_from_ideal_bucket()) { 1360 if (m_buckets[ibucket].empty()) { 1361 m_buckets[ibucket].set_value_of_empty_bucket(dist_from_ideal_bucket, 1362 hash, std::move(value)); 1363 return; 1364 } else { 1365 m_buckets[ibucket].swap_with_value_in_bucket(dist_from_ideal_bucket, 1366 hash, value); 1367 } 1368 } 1369 1370 dist_from_ideal_bucket++; 1371 ibucket = next_bucket(ibucket); 1372 } 1373 } 1374 1375 /** 1376 * Grow the table if m_grow_on_next_insert is true or we reached the 1377 * max_load_factor. Shrink the table if m_try_shrink_on_next_insert is true 1378 * (an erase occurred) and we're below the min_load_factor. 1379 * 1380 * Return true if the table has been rehashed. 1381 */ 1382 bool rehash_on_extreme_load(distance_type curr_dist_from_ideal_bucket) { 1383 if (m_grow_on_next_insert || 1384 curr_dist_from_ideal_bucket > 1385 bucket_entry::DIST_FROM_IDEAL_BUCKET_LIMIT || 1386 size() >= m_load_threshold) { 1387 rehash_impl(GrowthPolicy::next_bucket_count()); 1388 m_grow_on_next_insert = false; 1389 1390 return true; 1391 } 1392 1393 if (m_try_shrink_on_next_insert) { 1394 m_try_shrink_on_next_insert = false; 1395 if (m_min_load_factor != 0.0f && load_factor() < m_min_load_factor) { 1396 reserve(size() + 1); 1397 1398 return true; 1399 } 1400 } 1401 1402 return false; 1403 } 1404 1405 template <class Serializer> 1406 void serialize_impl(Serializer& serializer) const { 1407 const slz_size_type version = SERIALIZATION_PROTOCOL_VERSION; 1408 serializer(version); 1409 1410 // Indicate if the truncated hash of each bucket is stored. Use a 1411 // std::int16_t instead of a bool to avoid the need for the serializer to 1412 // support an extra 'bool' type. 1413 const std::int16_t hash_stored_for_bucket = 1414 static_cast<std::int16_t>(STORE_HASH); 1415 serializer(hash_stored_for_bucket); 1416 1417 const slz_size_type nb_elements = m_nb_elements; 1418 serializer(nb_elements); 1419 1420 const slz_size_type bucket_count = m_buckets_data.size(); 1421 serializer(bucket_count); 1422 1423 const float min_load_factor = m_min_load_factor; 1424 serializer(min_load_factor); 1425 1426 const float max_load_factor = m_max_load_factor; 1427 serializer(max_load_factor); 1428 1429 for (const bucket_entry& bucket : m_buckets_data) { 1430 if (bucket.empty()) { 1431 const std::int16_t empty_bucket = 1432 bucket_entry::EMPTY_MARKER_DIST_FROM_IDEAL_BUCKET; 1433 serializer(empty_bucket); 1434 } else { 1435 const std::int16_t dist_from_ideal_bucket = 1436 bucket.dist_from_ideal_bucket(); 1437 serializer(dist_from_ideal_bucket); 1438 if (STORE_HASH) { 1439 const std::uint32_t truncated_hash = bucket.truncated_hash(); 1440 serializer(truncated_hash); 1441 } 1442 serializer(bucket.value()); 1443 } 1444 } 1445 } 1446 1447 template <class Deserializer> 1448 void deserialize_impl(Deserializer& deserializer, bool hash_compatible) { 1449 tsl_rh_assert(m_buckets_data.empty()); // Current hash table must be empty 1450 1451 const slz_size_type version = 1452 deserialize_value<slz_size_type>(deserializer); 1453 // For now we only have one version of the serialization protocol. 1454 // If it doesn't match there is a problem with the file. 1455 if (version != SERIALIZATION_PROTOCOL_VERSION) { 1456 TSL_RH_THROW_OR_TERMINATE(std::runtime_error, 1457 "Can't deserialize the ordered_map/set. " 1458 "The protocol version header is invalid."); 1459 } 1460 1461 const bool hash_stored_for_bucket = 1462 deserialize_value<std::int16_t>(deserializer) ? true : false; 1463 if (hash_compatible && STORE_HASH != hash_stored_for_bucket) { 1464 TSL_RH_THROW_OR_TERMINATE( 1465 std::runtime_error, 1466 "Can't deserialize a map with a different StoreHash " 1467 "than the one used during the serialization when " 1468 "hash compatibility is used"); 1469 } 1470 1471 const slz_size_type nb_elements = 1472 deserialize_value<slz_size_type>(deserializer); 1473 const slz_size_type bucket_count_ds = 1474 deserialize_value<slz_size_type>(deserializer); 1475 const float min_load_factor = deserialize_value<float>(deserializer); 1476 const float max_load_factor = deserialize_value<float>(deserializer); 1477 1478 if (min_load_factor < MINIMUM_MIN_LOAD_FACTOR || 1479 min_load_factor > MAXIMUM_MIN_LOAD_FACTOR) { 1480 TSL_RH_THROW_OR_TERMINATE( 1481 std::runtime_error, 1482 "Invalid min_load_factor. Check that the serializer " 1483 "and deserializer support floats correctly as they " 1484 "can be converted implicitly to ints."); 1485 } 1486 1487 if (max_load_factor < MINIMUM_MAX_LOAD_FACTOR || 1488 max_load_factor > MAXIMUM_MAX_LOAD_FACTOR) { 1489 TSL_RH_THROW_OR_TERMINATE( 1490 std::runtime_error, 1491 "Invalid max_load_factor. Check that the serializer " 1492 "and deserializer support floats correctly as they " 1493 "can be converted implicitly to ints."); 1494 } 1495 1496 this->min_load_factor(min_load_factor); 1497 this->max_load_factor(max_load_factor); 1498 1499 if (bucket_count_ds == 0) { 1500 tsl_rh_assert(nb_elements == 0); 1501 return; 1502 } 1503 1504 if (!hash_compatible) { 1505 reserve(numeric_cast<size_type>(nb_elements, 1506 "Deserialized nb_elements is too big.")); 1507 for (slz_size_type ibucket = 0; ibucket < bucket_count_ds; ibucket++) { 1508 const distance_type dist_from_ideal_bucket = 1509 deserialize_value<std::int16_t>(deserializer); 1510 if (dist_from_ideal_bucket != 1511 bucket_entry::EMPTY_MARKER_DIST_FROM_IDEAL_BUCKET) { 1512 if (hash_stored_for_bucket) { 1513 TSL_RH_UNUSED(deserialize_value<std::uint32_t>(deserializer)); 1514 } 1515 1516 insert(deserialize_value<value_type>(deserializer)); 1517 } 1518 } 1519 1520 tsl_rh_assert(nb_elements == size()); 1521 } else { 1522 m_bucket_count = numeric_cast<size_type>( 1523 bucket_count_ds, "Deserialized bucket_count is too big."); 1524 1525 GrowthPolicy::operator=(GrowthPolicy(m_bucket_count)); 1526 // GrowthPolicy should not modify the bucket count we got from 1527 // deserialization 1528 if (m_bucket_count != bucket_count_ds) { 1529 TSL_RH_THROW_OR_TERMINATE(std::runtime_error, 1530 "The GrowthPolicy is not the same even " 1531 "though hash_compatible is true."); 1532 } 1533 1534 m_nb_elements = numeric_cast<size_type>( 1535 nb_elements, "Deserialized nb_elements is too big."); 1536 m_buckets_data.resize(m_bucket_count); 1537 m_buckets = m_buckets_data.data(); 1538 1539 for (bucket_entry& bucket : m_buckets_data) { 1540 const distance_type dist_from_ideal_bucket = 1541 deserialize_value<std::int16_t>(deserializer); 1542 if (dist_from_ideal_bucket != 1543 bucket_entry::EMPTY_MARKER_DIST_FROM_IDEAL_BUCKET) { 1544 truncated_hash_type truncated_hash = 0; 1545 if (hash_stored_for_bucket) { 1546 tsl_rh_assert(hash_stored_for_bucket); 1547 truncated_hash = deserialize_value<std::uint32_t>(deserializer); 1548 } 1549 1550 bucket.set_value_of_empty_bucket( 1551 dist_from_ideal_bucket, truncated_hash, 1552 deserialize_value<value_type>(deserializer)); 1553 } 1554 } 1555 1556 if (!m_buckets_data.empty()) { 1557 m_buckets_data.back().set_as_last_bucket(); 1558 } 1559 } 1560 } 1561 1562 public: 1563 static const size_type DEFAULT_INIT_BUCKETS_SIZE = 0; 1564 1565 static constexpr float DEFAULT_MAX_LOAD_FACTOR = 0.5f; 1566 static constexpr float MINIMUM_MAX_LOAD_FACTOR = 0.2f; 1567 static constexpr float MAXIMUM_MAX_LOAD_FACTOR = 0.95f; 1568 1569 static constexpr float DEFAULT_MIN_LOAD_FACTOR = 0.0f; 1570 static constexpr float MINIMUM_MIN_LOAD_FACTOR = 0.0f; 1571 static constexpr float MAXIMUM_MIN_LOAD_FACTOR = 0.15f; 1572 1573 static_assert(MINIMUM_MAX_LOAD_FACTOR < MAXIMUM_MAX_LOAD_FACTOR, 1574 "MINIMUM_MAX_LOAD_FACTOR should be < MAXIMUM_MAX_LOAD_FACTOR"); 1575 static_assert(MINIMUM_MIN_LOAD_FACTOR < MAXIMUM_MIN_LOAD_FACTOR, 1576 "MINIMUM_MIN_LOAD_FACTOR should be < MAXIMUM_MIN_LOAD_FACTOR"); 1577 static_assert(MAXIMUM_MIN_LOAD_FACTOR < MINIMUM_MAX_LOAD_FACTOR, 1578 "MAXIMUM_MIN_LOAD_FACTOR should be < MINIMUM_MAX_LOAD_FACTOR"); 1579 1580 private: 1581 /** 1582 * Protocol version currenlty used for serialization. 1583 */ 1584 static const slz_size_type SERIALIZATION_PROTOCOL_VERSION = 1; 1585 1586 /** 1587 * Return an always valid pointer to an static empty bucket_entry with 1588 * last_bucket() == true. 1589 */ 1590 bucket_entry* static_empty_bucket_ptr() noexcept { 1591 static bucket_entry empty_bucket(true); 1592 tsl_rh_assert(empty_bucket.empty()); 1593 return &empty_bucket; 1594 } 1595 1596 private: 1597 buckets_container_type m_buckets_data; 1598 1599 /** 1600 * Points to m_buckets_data.data() if !m_buckets_data.empty() otherwise points 1601 * to static_empty_bucket_ptr. This variable is useful to avoid the cost of 1602 * checking if m_buckets_data is empty when trying to find an element. 1603 * 1604 * TODO Remove m_buckets_data and only use a pointer instead of a 1605 * pointer+vector to save some space in the robin_hash object. Manage the 1606 * Allocator manually. 1607 */ 1608 bucket_entry* m_buckets; 1609 1610 /** 1611 * Used a lot in find, avoid the call to m_buckets_data.size() which is a bit 1612 * slower. 1613 */ 1614 size_type m_bucket_count; 1615 1616 size_type m_nb_elements; 1617 1618 size_type m_load_threshold; 1619 1620 float m_min_load_factor; 1621 float m_max_load_factor; 1622 1623 bool m_grow_on_next_insert; 1624 1625 /** 1626 * We can't shrink down the map on erase operations as the erase methods need 1627 * to return the next iterator. Shrinking the map would invalidate all the 1628 * iterators and we could not return the next iterator in a meaningful way, On 1629 * erase, we thus just indicate on erase that we should try to shrink the hash 1630 * table on the next insert if we go below the min_load_factor. 1631 */ 1632 bool m_try_shrink_on_next_insert; 1633 }; 1634 1635 } // namespace detail_robin_hash 1636 1637 } // namespace tsl 1638 1639 #endif