File indexing completed on 2025-04-27 13:07:37
0001 /* 0002 * Copyright (C) 2006, 2007, 2008 Apple Inc. All rights reserved 0003 * 0004 * This library is free software; you can redistribute it and/or 0005 * modify it under the terms of the GNU Library General Public 0006 * License as published by the Free Software Foundation; either 0007 * version 2 of the License, or (at your option) any later version. 0008 * 0009 * This library is distributed in the hope that it will be useful, 0010 * but WITHOUT ANY WARRANTY; without even the implied warranty of 0011 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 0012 * Library General Public License for more details. 0013 * 0014 * You should have received a copy of the GNU Library General Public License 0015 * along with this library; see the file COPYING.LIB. If not, write to 0016 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 0017 * Boston, MA 02110-1301, USA. 0018 * 0019 */ 0020 0021 #ifndef StringHash_h 0022 #define StringHash_h 0023 0024 #include "AtomicStringImpl.h" 0025 #include "dom/dom_string.h" 0026 #include "xml/dom_stringimpl.h" 0027 #include <wtf/HashTraits.h> 0028 0029 using DOM::DOMString; 0030 using DOM::DOMStringImpl; 0031 0032 namespace khtml 0033 { 0034 0035 // FIXME: We should really figure out a way to put the computeHash function that's 0036 // currently a member function of DOMStringImpl into this file so we can be a little 0037 // closer to having all the nearly-identical hash functions in one place. 0038 0039 struct StringHash { 0040 static unsigned hash(DOMStringImpl *key) 0041 { 0042 return key->hash(); 0043 } 0044 static bool equal(DOMStringImpl *a, DOMStringImpl *b) 0045 { 0046 if (a == b) { 0047 return true; 0048 } 0049 if (!a || !b) { 0050 return false; 0051 } 0052 0053 unsigned aLength = a->length(); 0054 unsigned bLength = b->length(); 0055 if (aLength != bLength) { 0056 return false; 0057 } 0058 0059 const uint32_t *aChars = reinterpret_cast<const uint32_t *>(a->unicode()); 0060 const uint32_t *bChars = reinterpret_cast<const uint32_t *>(b->unicode()); 0061 0062 unsigned halfLength = aLength >> 1; 0063 for (unsigned i = 0; i != halfLength; ++i) 0064 if (*aChars++ != *bChars++) { 0065 return false; 0066 } 0067 0068 if (aLength & 1 && *reinterpret_cast<const uint16_t *>(aChars) != *reinterpret_cast<const uint16_t *>(bChars)) { 0069 return false; 0070 } 0071 0072 return true; 0073 } 0074 0075 static unsigned hash(const RefPtr<DOMStringImpl> &key) 0076 { 0077 return key->hash(); 0078 } 0079 static bool equal(const RefPtr<DOMStringImpl> &a, const RefPtr<DOMStringImpl> &b) 0080 { 0081 return equal(a.get(), b.get()); 0082 } 0083 0084 static unsigned hash(const DOMString &key) 0085 { 0086 return key.implementation()->hash(); 0087 } 0088 static bool equal(const DOMString &a, const DOMString &b) 0089 { 0090 return equal(a.implementation(), b.implementation()); 0091 } 0092 0093 static const bool safeToCompareToEmptyOrDeleted = false; 0094 }; 0095 0096 class CaseFoldingHash 0097 { 0098 private: 0099 // Golden ratio - arbitrary start value to avoid mapping all 0's to all 0's 0100 static const unsigned PHI = 0x9e3779b9U; 0101 public: 0102 // Paul Hsieh's SuperFastHash 0103 // http://www.azillionmonkeys.com/qed/hash.html 0104 static unsigned hash(const QChar *data, unsigned length) 0105 { 0106 unsigned l = length; 0107 const QChar *s = data; 0108 uint32_t hash = PHI; 0109 uint32_t tmp; 0110 0111 int rem = l & 1; 0112 l >>= 1; 0113 0114 // Main loop. 0115 for (; l > 0; l--) { 0116 hash += s[0].toCaseFolded().unicode(); 0117 tmp = (s[1].toCaseFolded().unicode() << 11) ^ hash; 0118 hash = (hash << 16) ^ tmp; 0119 s += 2; 0120 hash += hash >> 11; 0121 } 0122 0123 // Handle end case. 0124 if (rem) { 0125 hash += s[0].toCaseFolded().unicode(); 0126 hash ^= hash << 11; 0127 hash += hash >> 17; 0128 } 0129 0130 // Force "avalanching" of final 127 bits. 0131 hash ^= hash << 3; 0132 hash += hash >> 5; 0133 hash ^= hash << 2; 0134 hash += hash >> 15; 0135 hash ^= hash << 10; 0136 0137 // This avoids ever returning a hash code of 0, since that is used to 0138 // signal "hash not computed yet", using a value that is likely to be 0139 // effectively the same as 0 when the low bits are masked. 0140 hash |= !hash << 31; 0141 0142 return hash; 0143 } 0144 0145 static unsigned hash(DOMStringImpl *str) 0146 { 0147 return hash(str->unicode(), str->length()); 0148 } 0149 0150 static unsigned hash(const char *str, unsigned length) 0151 { 0152 // This hash is designed to work on 16-bit chunks at a time. But since the normal case 0153 // (above) is to hash UTF-16 characters, we just treat the 8-bit chars as if they 0154 // were 16-bit chunks, which will give matching results. 0155 0156 unsigned l = length; 0157 const char *s = str; 0158 uint32_t hash = PHI; 0159 uint32_t tmp; 0160 0161 int rem = l & 1; 0162 l >>= 1; 0163 0164 // Main loop 0165 for (; l > 0; l--) { 0166 hash += QChar::toCaseFolded((unsigned int)s[0]); 0167 tmp = (QChar::toCaseFolded((unsigned int)s[1]) << 11) ^ hash; 0168 hash = (hash << 16) ^ tmp; 0169 s += 2; 0170 hash += hash >> 11; 0171 } 0172 0173 // Handle end case 0174 if (rem) { 0175 hash += QChar::toCaseFolded((unsigned int)s[0]); 0176 hash ^= hash << 11; 0177 hash += hash >> 17; 0178 } 0179 0180 // Force "avalanching" of final 127 bits 0181 hash ^= hash << 3; 0182 hash += hash >> 5; 0183 hash ^= hash << 2; 0184 hash += hash >> 15; 0185 hash ^= hash << 10; 0186 0187 // this avoids ever returning a hash code of 0, since that is used to 0188 // signal "hash not computed yet", using a value that is likely to be 0189 // effectively the same as 0 when the low bits are masked 0190 hash |= !hash << 31; 0191 0192 return hash; 0193 } 0194 0195 static bool equal(DOMStringImpl *a, DOMStringImpl *b) 0196 { 0197 if (a == b) { 0198 return true; 0199 } 0200 if (!a || !b) { 0201 return false; 0202 } 0203 unsigned length = a->length(); 0204 if (length != b->length()) { 0205 return false; 0206 } 0207 for (unsigned i = 0; i < length; ++i) 0208 if (a->unicode()[i].toCaseFolded() != b->unicode()[i].toCaseFolded()) { 0209 return false; 0210 } 0211 return true; 0212 } 0213 0214 static unsigned hash(const RefPtr<DOMStringImpl> &key) 0215 { 0216 return hash(key.get()); 0217 } 0218 0219 static bool equal(const RefPtr<DOMStringImpl> &a, const RefPtr<DOMStringImpl> &b) 0220 { 0221 return equal(a.get(), b.get()); 0222 } 0223 0224 static unsigned hash(const DOMString &key) 0225 { 0226 return hash(key.implementation()); 0227 } 0228 static bool equal(const DOMString &a, const DOMString &b) 0229 { 0230 return equal(a.implementation(), b.implementation()); 0231 } 0232 0233 static const bool safeToCompareToEmptyOrDeleted = false; 0234 }; 0235 0236 // This hash can be used in cases where the key is a hash of a string, but we don't 0237 // want to store the string. It's not really specific to string hashing, but all our 0238 // current uses of it are for strings. 0239 struct AlreadyHashed : IntHash<unsigned> { 0240 static unsigned hash(unsigned key) 0241 { 0242 return key; 0243 } 0244 0245 // To use a hash value as a key for a hash table, we need to eliminate the 0246 // "deleted" value, which is negative one. That could be done by changing 0247 // the string hash function to never generate negative one, but this works 0248 // and is still relatively efficient. 0249 static unsigned avoidDeletedValue(unsigned hash) 0250 { 0251 ASSERT(hash); 0252 unsigned newHash = hash | (!(hash + 1) << 31); 0253 ASSERT(newHash); 0254 ASSERT(newHash != 0xFFFFFFFF); 0255 return newHash; 0256 } 0257 }; 0258 0259 } 0260 0261 namespace WTF 0262 { 0263 0264 /*template<> struct HashTraits<DOM::DOMString> : GenericHashTraits<DOM::DOMString> { 0265 static const bool emptyValueIsZero = true; 0266 static void constructDeletedValue(DOM::DOMString& slot) { new (&slot) DOM::DOMString(HashTableDeletedValue); } 0267 static bool isDeletedValue(const DOM::DOMString& slot) { return slot.isHashTableDeletedValue(); } 0268 };*/ 0269 0270 } 0271 0272 #endif 0273