File indexing completed on 2024-04-14 04:49:09
0001 /* 0002 This file belong to the KMPlayer project, a movie player plugin for Konqueror 0003 SPDX-FileCopyrightText: 2007 Koos Vriezen <koos.vriezen@gmail.com> 0004 0005 SPDX-License-Identifier: LGPL-2.0-or-later 0006 */ 0007 0008 #ifdef TEST_TRIE 0009 # define KMPLAYERCOMMON_EXPORT 0010 #else 0011 # include <config-kmplayer.h> 0012 #endif 0013 #include <cassert> 0014 #include <cstdio> 0015 #include <cstdlib> 0016 #include <vector> 0017 0018 #include "kmplayercommon_log.h" 0019 #include "triestring.h" 0020 0021 namespace KMPlayer { 0022 0023 struct TrieNode 0024 { 0025 enum { MaxPacked = sizeof (void*) }; 0026 0027 TrieNode() : ref_count(0), length(0), parent(nullptr), buffer(nullptr) 0028 {} 0029 TrieNode(TrieNode* p, const char* s, size_t len) 0030 : ref_count(0), length(0) 0031 { 0032 update(p, s, len); 0033 } 0034 ~TrieNode() 0035 { 0036 if (length > MaxPacked) 0037 free(buffer); 0038 } 0039 void update(TrieNode* p, const char* s, size_t len) 0040 { 0041 char* old = length > MaxPacked ? buffer : nullptr; 0042 parent = p; 0043 length = len; 0044 if (len <= MaxPacked) { 0045 if ((unsigned)::abs(s - packed) > len) 0046 memcpy(packed, s, len); 0047 else 0048 memmove(packed, s, len); 0049 } else { 0050 buffer = (char*)malloc(len); 0051 memcpy(buffer, s, len); 0052 } 0053 if (old) 0054 free(old); 0055 } 0056 0057 int ref_count; 0058 unsigned length; 0059 TrieNode* parent; 0060 std::vector<TrieNode*> children; 0061 0062 union { 0063 char packed[MaxPacked]; 0064 char* buffer; 0065 }; 0066 }; 0067 0068 } 0069 0070 using namespace KMPlayer; 0071 0072 static char* trieCharPtr(TrieNode* n) 0073 { 0074 return n->length <= TrieNode::MaxPacked ? n->packed : n->buffer; 0075 } 0076 0077 static char* trieRetrieveString (TrieNode* node, int& len) 0078 { 0079 char *buf; 0080 if (node->parent) { 0081 len += node->length; 0082 int p = len; 0083 buf = trieRetrieveString(node->parent, len); 0084 memcpy(buf+len-p, trieCharPtr(node), node->length); 0085 } else { 0086 buf = (char*)malloc(len + 1); 0087 buf[len] = 0; 0088 } 0089 return buf; 0090 } 0091 0092 static TrieNode* trieRoot() 0093 { 0094 static TrieNode* trie_root; 0095 if (!trie_root) 0096 trie_root = new TrieNode(); 0097 return trie_root; 0098 } 0099 0100 static void dump(TrieNode* n, int indent) 0101 { 0102 for (int i =0; i < indent; ++i) 0103 fprintf(stderr, " "); 0104 fprintf(stderr, "'"); 0105 for (unsigned i = 0; i < n->length; ++i) 0106 fprintf(stderr, "%c", trieCharPtr(n)[i]); 0107 fprintf(stderr, "'\n"); 0108 for (unsigned i = 0; i < n->children.size(); ++i) 0109 dump(n->children[i], indent+2); 0110 } 0111 0112 static int trieSameGenerationCompare(TrieNode* n1, TrieNode* n2) 0113 { 0114 if (n1->parent == n2->parent) 0115 return memcmp(trieCharPtr(n1), trieCharPtr(n2), std::min(n1->length, n2->length)); 0116 return trieSameGenerationCompare(n1->parent, n2->parent); 0117 } 0118 0119 static int trieCompare(TrieNode* n1, TrieNode* n2) 0120 { 0121 if (n1 == n2) 0122 return 0; 0123 int depth1 = 0, depth2 = 0; 0124 for (TrieNode* n = n1; n; n = n->parent) 0125 depth1++; 0126 if (!depth1) 0127 return n2 ? -1 : 0; 0128 for (TrieNode* n = n2; n; n = n->parent) 0129 depth2++; 0130 if (!depth2) 0131 return 1; 0132 if (depth1 != depth2) { 0133 int dcmp = depth1 > depth2 ? 1 : -1; 0134 for (; depth1 > depth2; --depth1) 0135 n1 = n1->parent; 0136 for (; depth2 > depth1; --depth2) 0137 n2 = n2->parent; 0138 if (n1 == n2) 0139 return dcmp; 0140 } 0141 return trieSameGenerationCompare(n1, n2); 0142 } 0143 0144 static int trieStringCompare(TrieNode* node, const char* s, int& pos, int len) 0145 { 0146 int cmp = 0; 0147 if (node->parent) 0148 cmp = trieStringCompare (node->parent, s, pos, len); 0149 if (!cmp) { 0150 if (pos > len) 0151 return 1; 0152 if (pos == len) 0153 return node->length ? 1 : 0; 0154 if (len - pos < static_cast<int>(node->length)) { 0155 cmp = memcmp(trieCharPtr(node), s + pos, len - pos); 0156 if (!cmp) 0157 cmp = 1; 0158 } else { 0159 cmp = memcmp(trieCharPtr(node), s + pos, node->length); 0160 } 0161 pos += node->length; 0162 } 0163 return cmp; 0164 } 0165 0166 static int trieStringCompare(TrieNode* node, const char* s) 0167 { 0168 if (!node) 0169 return !!s; 0170 if (!s) 0171 return 1; 0172 int pos = 0; 0173 int len = strlen(s); 0174 int cmp = trieStringCompare(node, s, pos, len); 0175 #ifdef TEST_TRIE 0176 fprintf(stderr, "== %s -> (%d %d) %d\n", s, pos, len, cmp); 0177 #endif 0178 if (cmp) 0179 return cmp; 0180 if (pos == len) 0181 return 0; 0182 return pos < len ? -1 : 1; 0183 } 0184 0185 //first index in range [first,last) which does not compare less than c 0186 static int trieLowerBound(const TrieNode* n, int begin, int end, char c) 0187 { 0188 if (begin == end) 0189 return end; 0190 if (begin == end - 1) 0191 return trieCharPtr(n->children[begin])[0] >= c ? begin : end; 0192 int i = (begin + end)/2; 0193 char c1 = trieCharPtr(n->children[i])[0]; 0194 if (c == c1) 0195 return i; 0196 if (c < c1) 0197 return trieLowerBound(n, begin, i, c); 0198 return trieLowerBound(n, i + 1, end, c); 0199 } 0200 0201 static TrieNode* trieInsert(TrieNode* parent, const char* s, size_t len) 0202 { 0203 TrieNode* node; 0204 0205 if (!*s) 0206 return parent; 0207 0208 unsigned idx = trieLowerBound(parent, 0, parent->children.size(), s[0]); 0209 if (idx < parent->children.size()) { 0210 node = parent->children[idx]; 0211 char* s2 = trieCharPtr(node); 0212 if (s[0] == s2[0]) { 0213 if (node->length == len 0214 && !memcmp((void*)s, trieCharPtr(node), len)) { 0215 return node; 0216 } 0217 for (unsigned i = 1; i < node->length; ++i) { 0218 if (i == len) { 0219 TrieNode* rep = new TrieNode(parent, s, i); 0220 parent->children[idx] = rep; 0221 node->update(rep, s2 + i, node->length - i); 0222 rep->children.push_back(node); 0223 return rep; 0224 } else if (s[i] != s2[i]) { 0225 TrieNode* rep = new TrieNode(parent, s2, i); 0226 TrieNode* child = new TrieNode(rep, s + i, len - i); 0227 bool cmp = s2[i] < s[i]; 0228 node->update(rep, s2 + i, node->length - i); 0229 if (cmp) { 0230 rep->children.push_back(node); 0231 rep->children.push_back(child); 0232 } else { 0233 rep->children.push_back(child); 0234 rep->children.push_back(node); 0235 } 0236 parent->children[idx] = rep; 0237 return child; 0238 } 0239 } 0240 return trieInsert(node, s + node->length, len - node->length); 0241 } else if (s[0] < s2[0]) { 0242 node = new TrieNode(parent, s, len); 0243 parent->children.insert(parent->children.begin()+idx, node); 0244 return node; 0245 } 0246 } 0247 node = new TrieNode(parent, s, len); 0248 parent->children.push_back(node); 0249 return node; 0250 } 0251 0252 static void trieRemove(TrieNode* node) 0253 { 0254 if (node->children.size() > 1) 0255 return; 0256 TrieNode* parent = node->parent; 0257 if (!parent) 0258 return; 0259 char* s = trieCharPtr(node); 0260 assert(*s); 0261 unsigned idx = trieLowerBound(parent, 0, parent->children.size(), s[0]); 0262 assert(parent->children[idx] == node); 0263 if (node->children.size()) { 0264 TrieNode* child = node->children[0]; 0265 char* s1 = (char*)malloc(child->length + node->length); 0266 memcpy(s1, s, node->length); 0267 memcpy(s1 + node->length, trieCharPtr(child), child->length); 0268 child->update(parent, s1, child->length + node->length); 0269 free(s1); 0270 parent->children[idx] = child; 0271 delete node; 0272 } else { 0273 parent->children.erase(parent->children.begin() + idx); 0274 delete node; 0275 if (!parent->ref_count) 0276 trieRemove(parent); 0277 } 0278 } 0279 0280 static int trieStringStarts(TrieNode* node, const char* s, int& pos) 0281 { 0282 int cmp = -1; // -1 still matches, 0 no, 1 yes 0283 if (node->parent) 0284 cmp = trieStringStarts(node->parent, s, pos); 0285 if (cmp == -1) { 0286 char* s1 = trieCharPtr(node); 0287 for (unsigned i = 0; i < node->length; ++i) 0288 if (s1[i] != s[pos + i]) 0289 return !s[pos + i] ? 1 : 0; 0290 pos += node->length; 0291 } 0292 return cmp; 0293 } 0294 0295 TrieString::TrieString (const QString& s) : node(nullptr) 0296 { 0297 if (!s.isNull()) { 0298 const QByteArray ba = s.toUtf8(); 0299 node = trieInsert(trieRoot(), ba.constData(), ba.length()); 0300 ++node->ref_count; 0301 } 0302 } 0303 0304 TrieString::TrieString(const char* s) 0305 : node(!s ? nullptr : trieInsert(trieRoot(), s, strlen(s))) 0306 { 0307 if (node) 0308 ++node->ref_count; 0309 } 0310 0311 TrieString::TrieString(const char* s, int len) 0312 : node(!s ? nullptr : trieInsert(trieRoot(), s, len)) 0313 { 0314 if (node) 0315 ++node->ref_count; 0316 } 0317 0318 TrieString::TrieString(const TrieString& s) : node(s.node) 0319 { 0320 if (node) 0321 ++node->ref_count; 0322 } 0323 0324 TrieString::~TrieString() 0325 { 0326 if (node && !--node->ref_count) { 0327 #ifdef TEST_TRIE 0328 fprintf(stderr, "delete %s\n", qPrintable(toString())); 0329 #endif 0330 trieRemove(node); 0331 } 0332 } 0333 0334 TrieString& TrieString::operator=(const char* s) 0335 { 0336 if (node && !--node->ref_count) { 0337 #ifdef TEST_TRIE 0338 fprintf(stderr, "= delete %s\n", qPrintable(toString())); 0339 #endif 0340 trieRemove(node); 0341 } 0342 node = !s ? nullptr : trieInsert(trieRoot(), s, strlen(s)); 0343 if (node) 0344 ++node->ref_count; 0345 return *this; 0346 } 0347 0348 TrieString& TrieString::operator=(const TrieString& s) 0349 { 0350 if (s.node != node) { 0351 if (s.node) 0352 ++s.node->ref_count; 0353 if (node && !--node->ref_count) { 0354 #ifdef TEST_TRIE 0355 fprintf(stderr, "= delete %s\n", qPrintable(toString())); 0356 #endif 0357 trieRemove(node); 0358 } 0359 node = s.node; 0360 } 0361 return *this; 0362 } 0363 0364 bool TrieString::operator<(const TrieString& s) const 0365 { 0366 return trieCompare(node, s.node) < 0; 0367 } 0368 0369 bool KMPlayer::operator==(const TrieString& t, const char* s) 0370 { 0371 return trieStringCompare(t.node, s) == 0; 0372 } 0373 0374 bool TrieString::startsWith(const TrieString& s) const 0375 { 0376 for (TrieNode* n = node; n; n = n->parent) 0377 if (n == s.node) 0378 return true; 0379 return s.node ? false : true; 0380 } 0381 0382 bool TrieString::startsWith(const char* str) const 0383 { 0384 if (!node) 0385 return !str ? true : false; 0386 if (!str) 0387 return true; 0388 int pos = 0; 0389 return trieStringStarts(node, str, pos) != 0; 0390 } 0391 0392 QString TrieString::toString() const 0393 { 0394 if (!node) 0395 return QString(); 0396 int len = 0; 0397 char* buf = trieRetrieveString(node, len); 0398 QString s = QString::fromUtf8(buf); 0399 free(buf); 0400 return s; 0401 } 0402 0403 void TrieString::clear() 0404 { 0405 if (node && !--node->ref_count) { 0406 #ifdef TEST_TRIE 0407 fprintf(stderr, "clear delete %s\n", qPrintable(toString())); 0408 #endif 0409 trieRemove(node); 0410 } 0411 node = nullptr; 0412 } 0413 0414 0415 TrieString Ids::attr_id; 0416 TrieString Ids::attr_name; 0417 TrieString Ids::attr_src; 0418 TrieString Ids::attr_url; 0419 TrieString Ids::attr_href; 0420 TrieString Ids::attr_width; 0421 TrieString Ids::attr_height; 0422 TrieString Ids::attr_top; 0423 TrieString Ids::attr_left; 0424 TrieString Ids::attr_bottom; 0425 TrieString Ids::attr_right; 0426 TrieString Ids::attr_title; 0427 TrieString Ids::attr_begin; 0428 TrieString Ids::attr_dur; 0429 TrieString Ids::attr_end; 0430 TrieString Ids::attr_region; 0431 TrieString Ids::attr_target; 0432 TrieString Ids::attr_type; 0433 TrieString Ids::attr_value; 0434 TrieString Ids::attr_fill; 0435 TrieString Ids::attr_fit; 0436 0437 void Ids::init() { 0438 attr_width = "width"; 0439 attr_value = "value"; 0440 attr_url = "url"; 0441 attr_type = "type"; 0442 attr_top = "top"; 0443 attr_title = "title"; 0444 attr_target = "target"; 0445 attr_src = "src"; 0446 attr_right = "right"; 0447 attr_region = "region"; 0448 attr_name = "name"; 0449 attr_left = "left"; 0450 attr_id = "id"; 0451 attr_href = "href"; 0452 attr_height = "height"; 0453 attr_fit = "fit"; 0454 attr_fill = "fill"; 0455 attr_end = "end"; 0456 attr_dur = "dur"; 0457 attr_bottom = "bottom"; 0458 attr_begin = "begin"; 0459 } 0460 0461 void Ids::reset() { 0462 attr_id.clear (); 0463 attr_name.clear (); 0464 attr_src.clear (); 0465 attr_url.clear (); 0466 attr_href.clear (); 0467 attr_width.clear (); 0468 attr_height.clear (); 0469 attr_top.clear (); 0470 attr_left.clear (); 0471 attr_bottom.clear (); 0472 attr_right.clear (); 0473 attr_title.clear (); 0474 attr_begin.clear (); 0475 attr_dur.clear (); 0476 attr_end.clear (); 0477 attr_region.clear (); 0478 attr_target.clear (); 0479 attr_type.clear (); 0480 attr_value.clear (); 0481 attr_fill.clear (); 0482 attr_fit.clear (); 0483 if (trieRoot()->children.size()) { 0484 qCWarning(LOG_KMPLAYER_COMMON) << "Trie not empty"; 0485 dumpTrie (); 0486 //} else { 0487 //delete root_trie; 0488 //root_trie = 0; 0489 } 0490 } 0491 0492 void KMPlayer::dumpTrie () { 0493 dump(trieRoot(), 0); 0494 } 0495 0496 #ifdef TEST_TRIE 0497 // g++ triestring.cpp -o triestring -I$QTDIR/include -L$QTDIR/lib -lqt-mt -g -DTEST_TRIE 0498 0499 int main (int, char **) { 0500 Ids::init(); 0501 { 0502 TrieString s1; 0503 TrieString s1_1(QString ("region")); 0504 s1 = s1_1; 0505 TrieString s2 (QString ("regionName")); 0506 TrieString s3 (QString ("regPoint")); 0507 TrieString s4 (QString ("regAlign")); 0508 TrieString s6 (QString ("freeze")); 0509 TrieString s7 (QString ("fit")); 0510 { 0511 TrieString s7_1 (QString ("fit")); 0512 TrieString s5 (QString ("fill")); 0513 dump (root_trie, 0); 0514 } 0515 dump (root_trie, 0); 0516 TrieString s5 (QString ("fill")); 0517 TrieString s8 (QString ("fontPtSize")); 0518 TrieString s9 (QString ("fontSize")); 0519 TrieString s10 (QString ("fontFace")); 0520 TrieString s11 (QString ("fontColor")); 0521 TrieString s12 (QString ("hAlign")); 0522 TrieString s13 (QString ("region")); 0523 TrieString s14 (QString ("ref")); 0524 TrieString s15 (QString ("head")); 0525 dump (root_trie, 0); 0526 QString qs1 = s1.toString (); 0527 QString qs2 = s2.toString (); 0528 printf ("%s\n%s\n", qs1.toLatin1(), qs2.toLatin1()); 0529 printf("equal %s %s %d\n", qs2.toLatin1(), "regionName", s2 == "regionName"); 0530 printf("equal %s %s %d\n", qs2.toLatin1(), "zegionName", s2 == "zegionName"); 0531 printf("equal %s %s %d\n", qs2.toLatin1(), "reqionName", s2 == "reqionName"); 0532 printf("equal %s %s %d\n", qs2.toLatin1(), "regiinName", s2 == "regiinName"); 0533 printf("equal %s %s %d\n", qs2.toLatin1(), "regionNeme", s2 == "regionNeme"); 0534 printf("%s < %s %d\n", qs2.toLatin1(), "regionName", s2 < TrieString("regionName")); 0535 printf("%s < %s %d\n", qs2.toLatin1(), "zegion", s2 < TrieString("zegion")); 0536 printf("%s < %s %d\n", qs2.toLatin1(), "req", s2 < TrieString("req")); 0537 printf("%s < %s %d\n", qs2.toLatin1(), "regiinName", s2 < TrieString("regiinName")); 0538 printf("%s < %s %d\n", qs2.toLatin1(), "regionNeme", s2 < TrieString("regionNeme")); 0539 printf("%s startsWith %s %d\n", s1.toString().toLatin1(), "region", s1.startsWith ("region")); 0540 printf("%s startsWith %s %d\n", qs2.toLatin1(), "region", s2.startsWith ("region")); 0541 printf("%s startsWith %s %d\n", qs2.toLatin1(), "regi", s2.startsWith ("regi")); 0542 printf("%s startsWith %s %d\n", qs2.toLatin1(), "regian", s2.startsWith ("regian")); 0543 printf("%s startsWith %s %d\n", qs2.toLatin1(), "regio", s2.startsWith ("regio")); 0544 printf("%s startsWith %s %d\n", qs2.toLatin1(), "zegio", s2.startsWith ("zegio")); 0545 printf("%s startsWith %s %d\n", qs2.toLatin1(), "r", s2.startsWith ("r")); 0546 printf("%s startsWith %s %d\n", qs2.toLatin1(), "q", s2.startsWith ("q")); 0547 TrieString fnt ("font"); 0548 printf("%s startsWith %s %d\n", s8.toString().toLatin1(), fnt.toString().toLatin1(), s8.startsWith(fnt)); 0549 printf("%s startsWith %s %d\n", s8.toString().toLatin1(), s14.toString().toLatin1(), s8.startsWith(s14)); 0550 } 0551 dump (root_trie, 0); 0552 Ids::reset(); 0553 return 0; 0554 } 0555 #endif