File indexing completed on 2024-03-24 16:22:29
0001 /** 0002 This file belong to the KMPlayer project, a movie player plugin for Konqueror 0003 Copyright (C) 2007 Koos Vriezen <koos.vriezen@gmail.com> 0004 0005 This library is free software; you can redistribute it and/or 0006 modify it under the terms of the GNU Lesser General Public 0007 License as published by the Free Software Foundation; either 0008 version 2 of the License, or (at your option) any later version. 0009 0010 This library is distributed in the hope that it will be useful, 0011 but WITHOUT ANY WARRANTY; without even the implied warranty of 0012 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 0013 Lesser General Public License for more details. 0014 0015 You should have received a copy of the GNU Lesser General Public 0016 License along with this library; if not, write to the Free Software 0017 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 0018 **/ 0019 0020 #ifdef TEST_TRIE 0021 # define KMPLAYER_NO_EXPORT 0022 # define KMPLAYER_EXPORT 0023 # define KDE_NO_EXPORT 0024 # define KDE_NO_CDTOR_EXPORT 0025 #else 0026 # include <config-kmplayer.h> 0027 # include "kmplayer_def.h" 0028 #endif 0029 #include <assert.h> 0030 #include <stdio.h> 0031 #include <stdlib.h> 0032 #include <vector> 0033 0034 0035 #include "triestring.h" 0036 0037 namespace KMPlayer { 0038 0039 struct TrieNode 0040 { 0041 enum { MaxPacked = sizeof (void*) }; 0042 0043 TrieNode() : ref_count(0), length(0), parent(NULL), buffer(NULL) 0044 {} 0045 TrieNode(TrieNode* p, const char* s, size_t len) 0046 : ref_count(0), length(0) 0047 { 0048 update(p, s, len); 0049 } 0050 ~TrieNode() 0051 { 0052 if (length > MaxPacked) 0053 free(buffer); 0054 } 0055 void update(TrieNode* p, const char* s, size_t len) 0056 { 0057 char* old = length > MaxPacked ? buffer : NULL; 0058 parent = p; 0059 length = len; 0060 if (len <= MaxPacked) { 0061 if ((unsigned)::abs(s - packed) > len) 0062 memcpy(packed, s, len); 0063 else 0064 memmove(packed, s, len); 0065 } else { 0066 buffer = (char*)malloc(len); 0067 memcpy(buffer, s, len); 0068 } 0069 if (old) 0070 free(old); 0071 } 0072 0073 int ref_count; 0074 unsigned length; 0075 TrieNode* parent; 0076 std::vector<TrieNode*> children; 0077 0078 union { 0079 char packed[MaxPacked]; 0080 char* buffer; 0081 }; 0082 }; 0083 0084 } 0085 0086 using namespace KMPlayer; 0087 0088 static char* trieCharPtr(TrieNode* n) 0089 { 0090 return n->length <= TrieNode::MaxPacked ? n->packed : n->buffer; 0091 } 0092 0093 static char* trieRetrieveString (TrieNode* node, int& len) 0094 { 0095 char *buf; 0096 if (node->parent) { 0097 len += node->length; 0098 int p = len; 0099 buf = trieRetrieveString(node->parent, len); 0100 memcpy(buf+len-p, trieCharPtr(node), node->length); 0101 } else { 0102 buf = (char*)malloc(len + 1); 0103 buf[len] = 0; 0104 } 0105 return buf; 0106 } 0107 0108 static TrieNode* trieRoot() 0109 { 0110 static TrieNode* trie_root; 0111 if (!trie_root) 0112 trie_root = new TrieNode(); 0113 return trie_root; 0114 } 0115 0116 static void dump(TrieNode* n, int indent) 0117 { 0118 for (int i =0; i < indent; ++i) 0119 fprintf(stderr, " "); 0120 fprintf(stderr, "'"); 0121 for (unsigned i = 0; i < n->length; ++i) 0122 fprintf(stderr, "%c", trieCharPtr(n)[i]); 0123 fprintf(stderr, "'\n"); 0124 for (unsigned i = 0; i < n->children.size(); ++i) 0125 dump(n->children[i], indent+2); 0126 } 0127 0128 static int trieSameGenerationCompare(TrieNode* n1, TrieNode* n2) 0129 { 0130 if (n1->parent == n2->parent) 0131 return memcmp(trieCharPtr(n1), trieCharPtr(n2), std::min(n1->length, n2->length)); 0132 return trieSameGenerationCompare(n1->parent, n2->parent); 0133 } 0134 0135 static int trieCompare(TrieNode* n1, TrieNode* n2) 0136 { 0137 if (n1 == n2) 0138 return 0; 0139 int depth1 = 0, depth2 = 0; 0140 for (TrieNode* n = n1; n; n = n->parent) 0141 depth1++; 0142 if (!depth1) 0143 return n2 ? -1 : 0; 0144 for (TrieNode* n = n2; n; n = n->parent) 0145 depth2++; 0146 if (!depth2) 0147 return 1; 0148 if (depth1 != depth2) { 0149 int dcmp = depth1 > depth2 ? 1 : -1; 0150 for (; depth1 > depth2; --depth1) 0151 n1 = n1->parent; 0152 for (; depth2 > depth1; --depth2) 0153 n2 = n2->parent; 0154 if (n1 == n2) 0155 return dcmp; 0156 } 0157 return trieSameGenerationCompare(n1, n2); 0158 } 0159 0160 static int trieStringCompare(TrieNode* node, const char* s, int& pos, int len) 0161 { 0162 int cmp = 0; 0163 if (node->parent) 0164 cmp = trieStringCompare (node->parent, s, pos, len); 0165 if (!cmp) { 0166 if (pos > len) 0167 return 1; 0168 if (pos == len) 0169 return node->length ? 1 : 0; 0170 if (len - pos < node->length) { 0171 cmp = memcmp(trieCharPtr(node), s + pos, len - pos); 0172 if (!cmp) 0173 cmp = 1; 0174 } else { 0175 cmp = memcmp(trieCharPtr(node), s + pos, node->length); 0176 } 0177 pos += node->length; 0178 } 0179 return cmp; 0180 } 0181 0182 static int trieStringCompare(TrieNode* node, const char* s) 0183 { 0184 if (!node) 0185 return !!s; 0186 if (!s) 0187 return 1; 0188 int pos = 0; 0189 int len = strlen(s); 0190 int cmp = trieStringCompare(node, s, pos, len); 0191 #ifdef TEST_TRIE 0192 fprintf(stderr, "== %s -> (%d %d) %d\n", s, pos, len, cmp); 0193 #endif 0194 if (cmp) 0195 return cmp; 0196 if (pos == len) 0197 return 0; 0198 return pos < len ? -1 : 1; 0199 } 0200 0201 //first index in range [first,last) which does not compare less than c 0202 static int trieLowerBound(const TrieNode* n, int begin, int end, char c) 0203 { 0204 if (begin == end) 0205 return end; 0206 if (begin == end - 1) 0207 return trieCharPtr(n->children[begin])[0] >= c ? begin : end; 0208 int i = (begin + end)/2; 0209 char c1 = trieCharPtr(n->children[i])[0]; 0210 if (c == c1) 0211 return i; 0212 if (c < c1) 0213 return trieLowerBound(n, begin, i, c); 0214 return trieLowerBound(n, i + 1, end, c); 0215 } 0216 0217 static TrieNode* trieInsert(TrieNode* parent, const char* s, size_t len) 0218 { 0219 TrieNode* node; 0220 0221 if (!*s) 0222 return parent; 0223 0224 unsigned idx = trieLowerBound(parent, 0, parent->children.size(), s[0]); 0225 if (idx < parent->children.size()) { 0226 node = parent->children[idx]; 0227 char* s2 = trieCharPtr(node); 0228 if (s[0] == s2[0]) { 0229 if (node->length == len 0230 && !memcmp((void*)s, trieCharPtr(node), len)) { 0231 return node; 0232 } 0233 for (unsigned i = 1; i < node->length; ++i) { 0234 if (i == len) { 0235 TrieNode* rep = new TrieNode(parent, s, i); 0236 parent->children[idx] = rep; 0237 node->update(rep, s2 + i, node->length - i); 0238 rep->children.push_back(node); 0239 return rep; 0240 } else if (s[i] != s2[i]) { 0241 TrieNode* rep = new TrieNode(parent, s2, i); 0242 TrieNode* child = new TrieNode(rep, s + i, len - i); 0243 bool cmp = s2[i] < s[i]; 0244 node->update(rep, s2 + i, node->length - i); 0245 if (cmp) { 0246 rep->children.push_back(node); 0247 rep->children.push_back(child); 0248 } else { 0249 rep->children.push_back(child); 0250 rep->children.push_back(node); 0251 } 0252 parent->children[idx] = rep; 0253 return child; 0254 } 0255 } 0256 return trieInsert(node, s + node->length, len - node->length); 0257 } else if (s[0] < s2[0]) { 0258 node = new TrieNode(parent, s, len); 0259 parent->children.insert(parent->children.begin()+idx, node); 0260 return node; 0261 } 0262 } 0263 node = new TrieNode(parent, s, len); 0264 parent->children.push_back(node); 0265 return node; 0266 } 0267 0268 static void trieRemove(TrieNode* node) 0269 { 0270 if (node->children.size() > 1) 0271 return; 0272 TrieNode* parent = node->parent; 0273 if (!parent) 0274 return; 0275 char* s = trieCharPtr(node); 0276 assert(*s); 0277 unsigned idx = trieLowerBound(parent, 0, parent->children.size(), s[0]); 0278 assert(parent->children[idx] == node); 0279 if (node->children.size()) { 0280 TrieNode* child = node->children[0]; 0281 char* s1 = (char*)malloc(child->length + node->length); 0282 memcpy(s1, s, node->length); 0283 memcpy(s1 + node->length, trieCharPtr(child), child->length); 0284 child->update(parent, s1, child->length + node->length); 0285 free(s1); 0286 parent->children[idx] = child; 0287 delete node; 0288 } else { 0289 parent->children.erase(parent->children.begin() + idx); 0290 delete node; 0291 if (!parent->ref_count) 0292 trieRemove(parent); 0293 } 0294 } 0295 0296 static int trieStringStarts(TrieNode* node, const char* s, int& pos) 0297 { 0298 int cmp = -1; // -1 still matches, 0 no, 1 yes 0299 if (node->parent) 0300 cmp = trieStringStarts(node->parent, s, pos); 0301 if (cmp == -1) { 0302 char* s1 = trieCharPtr(node); 0303 for (unsigned i = 0; i < node->length; ++i) 0304 if (s1[i] != s[pos + i]) 0305 return !s[pos + i] ? 1 : 0; 0306 pos += node->length; 0307 } 0308 return cmp; 0309 } 0310 0311 TrieString::TrieString (const QString& s) : node(NULL) 0312 { 0313 if (!s.isNull()) { 0314 const QByteArray ba = s.toUtf8(); 0315 node = trieInsert(trieRoot(), ba.constData(), ba.length()); 0316 ++node->ref_count; 0317 } 0318 } 0319 0320 TrieString::TrieString(const char* s) 0321 : node(!s ? NULL : trieInsert(trieRoot(), s, strlen(s))) 0322 { 0323 if (node) 0324 ++node->ref_count; 0325 } 0326 0327 TrieString::TrieString(const char* s, int len) 0328 : node(!s ? NULL : trieInsert(trieRoot(), s, len)) 0329 { 0330 if (node) 0331 ++node->ref_count; 0332 } 0333 0334 TrieString::TrieString(const TrieString& s) : node(s.node) 0335 { 0336 if (node) 0337 ++node->ref_count; 0338 } 0339 0340 TrieString::~TrieString() 0341 { 0342 if (node && !--node->ref_count) { 0343 #ifdef TEST_TRIE 0344 fprintf(stderr, "delete %s\n", qPrintable(toString())); 0345 #endif 0346 trieRemove(node); 0347 } 0348 } 0349 0350 TrieString& TrieString::operator=(const char* s) 0351 { 0352 if (node && !--node->ref_count) { 0353 #ifdef TEST_TRIE 0354 fprintf(stderr, "= delete %s\n", qPrintable(toString())); 0355 #endif 0356 trieRemove(node); 0357 } 0358 node = !s ? NULL : trieInsert(trieRoot(), s, strlen(s)); 0359 if (node) 0360 ++node->ref_count; 0361 return *this; 0362 } 0363 0364 TrieString& TrieString::operator=(const TrieString& s) 0365 { 0366 if (s.node != node) { 0367 if (s.node) 0368 ++s.node->ref_count; 0369 if (node && !--node->ref_count) { 0370 #ifdef TEST_TRIE 0371 fprintf(stderr, "= delete %s\n", qPrintable(toString())); 0372 #endif 0373 trieRemove(node); 0374 } 0375 node = s.node; 0376 } 0377 return *this; 0378 } 0379 0380 bool TrieString::operator<(const TrieString& s) const 0381 { 0382 return trieCompare(node, s.node) < 0; 0383 } 0384 0385 bool KMPlayer::operator==(const TrieString& t, const char* s) 0386 { 0387 return trieStringCompare(t.node, s) == 0; 0388 } 0389 0390 bool TrieString::startsWith(const TrieString& s) const 0391 { 0392 for (TrieNode* n = node; n; n = n->parent) 0393 if (n == s.node) 0394 return true; 0395 return s.node ? false : true; 0396 } 0397 0398 bool TrieString::startsWith(const char* str) const 0399 { 0400 if (!node) 0401 return !str ? true : false; 0402 if (!str) 0403 return true; 0404 int pos = 0; 0405 return trieStringStarts(node, str, pos) != 0; 0406 } 0407 0408 QString TrieString::toString() const 0409 { 0410 if (!node) 0411 return QString(); 0412 int len = 0; 0413 char* buf = trieRetrieveString(node, len); 0414 QString s = QString::fromUtf8(buf); 0415 free(buf); 0416 return s; 0417 } 0418 0419 void TrieString::clear() 0420 { 0421 if (node && !--node->ref_count) { 0422 #ifdef TEST_TRIE 0423 fprintf(stderr, "clear delete %s\n", qPrintable(toString())); 0424 #endif 0425 trieRemove(node); 0426 } 0427 node = NULL; 0428 } 0429 0430 0431 TrieString Ids::attr_id; 0432 TrieString Ids::attr_name; 0433 TrieString Ids::attr_src; 0434 TrieString Ids::attr_url; 0435 TrieString Ids::attr_href; 0436 TrieString Ids::attr_width; 0437 TrieString Ids::attr_height; 0438 TrieString Ids::attr_top; 0439 TrieString Ids::attr_left; 0440 TrieString Ids::attr_bottom; 0441 TrieString Ids::attr_right; 0442 TrieString Ids::attr_title; 0443 TrieString Ids::attr_begin; 0444 TrieString Ids::attr_dur; 0445 TrieString Ids::attr_end; 0446 TrieString Ids::attr_region; 0447 TrieString Ids::attr_target; 0448 TrieString Ids::attr_type; 0449 TrieString Ids::attr_value; 0450 TrieString Ids::attr_fill; 0451 TrieString Ids::attr_fit; 0452 0453 void Ids::init() { 0454 attr_width = "width"; 0455 attr_value = "value"; 0456 attr_url = "url"; 0457 attr_type = "type"; 0458 attr_top = "top"; 0459 attr_title = "title"; 0460 attr_target = "target"; 0461 attr_src = "src"; 0462 attr_right = "right"; 0463 attr_region = "region"; 0464 attr_name = "name"; 0465 attr_left = "left"; 0466 attr_id = "id"; 0467 attr_href = "href"; 0468 attr_height = "height"; 0469 attr_fit = "fit"; 0470 attr_fill = "fill"; 0471 attr_end = "end"; 0472 attr_dur = "dur"; 0473 attr_bottom = "bottom"; 0474 attr_begin = "begin"; 0475 } 0476 0477 void Ids::reset() { 0478 attr_id.clear (); 0479 attr_name.clear (); 0480 attr_src.clear (); 0481 attr_url.clear (); 0482 attr_href.clear (); 0483 attr_width.clear (); 0484 attr_height.clear (); 0485 attr_top.clear (); 0486 attr_left.clear (); 0487 attr_bottom.clear (); 0488 attr_right.clear (); 0489 attr_title.clear (); 0490 attr_begin.clear (); 0491 attr_dur.clear (); 0492 attr_end.clear (); 0493 attr_region.clear (); 0494 attr_target.clear (); 0495 attr_type.clear (); 0496 attr_value.clear (); 0497 attr_fill.clear (); 0498 attr_fit.clear (); 0499 if (trieRoot()->children.size()) { 0500 qWarning ("Trie not empty"); 0501 dumpTrie (); 0502 //} else { 0503 //delete root_trie; 0504 //root_trie = 0; 0505 } 0506 } 0507 0508 void KMPlayer::dumpTrie () { 0509 dump(trieRoot(), 0); 0510 } 0511 0512 #ifdef TEST_TRIE 0513 // g++ triestring.cpp -o triestring -I$QTDIR/include -L$QTDIR/lib -lqt-mt -g -DTEST_TRIE 0514 0515 int main (int, char **) { 0516 Ids::init(); 0517 { 0518 TrieString s1; 0519 TrieString s1_1(QString ("region")); 0520 s1 = s1_1; 0521 TrieString s2 (QString ("regionName")); 0522 TrieString s3 (QString ("regPoint")); 0523 TrieString s4 (QString ("regAlign")); 0524 TrieString s6 (QString ("freeze")); 0525 TrieString s7 (QString ("fit")); 0526 { 0527 TrieString s7_1 (QString ("fit")); 0528 TrieString s5 (QString ("fill")); 0529 dump (root_trie, 0); 0530 } 0531 dump (root_trie, 0); 0532 TrieString s5 (QString ("fill")); 0533 TrieString s8 (QString ("fontPtSize")); 0534 TrieString s9 (QString ("fontSize")); 0535 TrieString s10 (QString ("fontFace")); 0536 TrieString s11 (QString ("fontColor")); 0537 TrieString s12 (QString ("hAlign")); 0538 TrieString s13 (QString ("region")); 0539 TrieString s14 (QString ("ref")); 0540 TrieString s15 (QString ("head")); 0541 dump (root_trie, 0); 0542 QString qs1 = s1.toString (); 0543 QString qs2 = s2.toString (); 0544 printf ("%s\n%s\n", qs1.toAscii(), qs2.toAscii()); 0545 printf("equal %s %s %d\n", qs2.toAscii(), "regionName", s2 == "regionName"); 0546 printf("equal %s %s %d\n", qs2.toAscii(), "zegionName", s2 == "zegionName"); 0547 printf("equal %s %s %d\n", qs2.toAscii(), "reqionName", s2 == "reqionName"); 0548 printf("equal %s %s %d\n", qs2.toAscii(), "regiinName", s2 == "regiinName"); 0549 printf("equal %s %s %d\n", qs2.toAscii(), "regionNeme", s2 == "regionNeme"); 0550 printf("%s < %s %d\n", qs2.toAscii(), "regionName", s2 < TrieString("regionName")); 0551 printf("%s < %s %d\n", qs2.toAscii(), "zegion", s2 < TrieString("zegion")); 0552 printf("%s < %s %d\n", qs2.toAscii(), "req", s2 < TrieString("req")); 0553 printf("%s < %s %d\n", qs2.toAscii(), "regiinName", s2 < TrieString("regiinName")); 0554 printf("%s < %s %d\n", qs2.toAscii(), "regionNeme", s2 < TrieString("regionNeme")); 0555 printf("%s startsWith %s %d\n", s1.toString().toAscii(), "region", s1.startsWith ("region")); 0556 printf("%s startsWith %s %d\n", qs2.toAscii(), "region", s2.startsWith ("region")); 0557 printf("%s startsWith %s %d\n", qs2.toAscii(), "regi", s2.startsWith ("regi")); 0558 printf("%s startsWith %s %d\n", qs2.toAscii(), "regian", s2.startsWith ("regian")); 0559 printf("%s startsWith %s %d\n", qs2.toAscii(), "regio", s2.startsWith ("regio")); 0560 printf("%s startsWith %s %d\n", qs2.toAscii(), "zegio", s2.startsWith ("zegio")); 0561 printf("%s startsWith %s %d\n", qs2.toAscii(), "r", s2.startsWith ("r")); 0562 printf("%s startsWith %s %d\n", qs2.toAscii(), "q", s2.startsWith ("q")); 0563 TrieString fnt ("font"); 0564 printf("%s startsWith %s %d\n", s8.toString().toAscii(), fnt.toString().toAscii(), s8.startsWith(fnt)); 0565 printf("%s startsWith %s %d\n", s8.toString().toAscii(), s14.toString().toAscii(), s8.startsWith(s14)); 0566 } 0567 dump (root_trie, 0); 0568 Ids::reset(); 0569 return 0; 0570 } 0571 #endif