File indexing completed on 2024-04-14 04:49:09

0001 /*
0002     This file belong to the KMPlayer project, a movie player plugin for Konqueror
0003     SPDX-FileCopyrightText: 2007 Koos Vriezen <koos.vriezen@gmail.com>
0004 
0005     SPDX-License-Identifier: LGPL-2.0-or-later
0006 */
0007 
0008 #ifdef TEST_TRIE
0009 # define KMPLAYERCOMMON_EXPORT
0010 #else
0011 # include <config-kmplayer.h>
0012 #endif
0013 #include <cassert>
0014 #include <cstdio>
0015 #include <cstdlib>
0016 #include <vector>
0017 
0018 #include "kmplayercommon_log.h"
0019 #include "triestring.h"
0020 
0021 namespace KMPlayer {
0022 
0023 struct TrieNode
0024 {
0025     enum { MaxPacked = sizeof (void*) };
0026 
0027     TrieNode() : ref_count(0), length(0), parent(nullptr), buffer(nullptr)
0028     {}
0029     TrieNode(TrieNode* p, const char* s, size_t len)
0030         : ref_count(0), length(0)
0031     {
0032         update(p, s, len);
0033     }
0034     ~TrieNode()
0035     {
0036         if (length > MaxPacked)
0037             free(buffer);
0038     }
0039     void update(TrieNode* p, const char* s, size_t len)
0040     {
0041         char* old = length > MaxPacked ? buffer : nullptr;
0042         parent = p;
0043         length = len;
0044         if (len <= MaxPacked) {
0045             if ((unsigned)::abs(s - packed) > len)
0046                 memcpy(packed, s, len);
0047             else
0048                 memmove(packed, s, len);
0049         } else {
0050             buffer = (char*)malloc(len);
0051             memcpy(buffer, s, len);
0052         }
0053         if (old)
0054             free(old);
0055     }
0056 
0057     int ref_count;
0058     unsigned length;
0059     TrieNode* parent;
0060     std::vector<TrieNode*> children;
0061 
0062     union {
0063         char packed[MaxPacked];
0064         char* buffer;
0065     };
0066 };
0067 
0068 }
0069 
0070 using namespace KMPlayer;
0071 
0072 static char* trieCharPtr(TrieNode* n)
0073 {
0074     return n->length <= TrieNode::MaxPacked ? n->packed : n->buffer;
0075 }
0076 
0077 static char* trieRetrieveString (TrieNode* node, int& len)
0078 {
0079     char *buf;
0080     if (node->parent) {
0081         len += node->length;
0082         int p = len;
0083         buf = trieRetrieveString(node->parent, len);
0084         memcpy(buf+len-p, trieCharPtr(node), node->length);
0085     } else {
0086         buf = (char*)malloc(len + 1);
0087         buf[len] = 0;
0088     }
0089     return buf;
0090 }
0091 
0092 static TrieNode* trieRoot()
0093 {
0094     static TrieNode* trie_root;
0095     if (!trie_root)
0096         trie_root = new TrieNode();
0097     return trie_root;
0098 }
0099 
0100 static void dump(TrieNode* n, int indent)
0101 {
0102     for (int i =0; i < indent; ++i)
0103         fprintf(stderr, " ");
0104     fprintf(stderr, "'");
0105     for (unsigned i = 0; i < n->length; ++i)
0106         fprintf(stderr, "%c", trieCharPtr(n)[i]);
0107     fprintf(stderr, "'\n");
0108     for (unsigned i = 0; i < n->children.size(); ++i)
0109         dump(n->children[i], indent+2);
0110 }
0111 
0112 static int trieSameGenerationCompare(TrieNode* n1, TrieNode* n2)
0113 {
0114     if (n1->parent == n2->parent)
0115         return memcmp(trieCharPtr(n1), trieCharPtr(n2), std::min(n1->length, n2->length));
0116     return trieSameGenerationCompare(n1->parent, n2->parent);
0117 }
0118 
0119 static int trieCompare(TrieNode* n1, TrieNode* n2)
0120 {
0121     if (n1 == n2)
0122         return 0;
0123     int depth1 = 0, depth2 = 0;
0124     for (TrieNode* n = n1; n; n = n->parent)
0125         depth1++;
0126     if (!depth1)
0127         return n2 ? -1 : 0;
0128     for (TrieNode* n = n2; n; n = n->parent)
0129         depth2++;
0130     if (!depth2)
0131         return 1;
0132     if (depth1 != depth2) {
0133         int dcmp = depth1 > depth2 ? 1 : -1;
0134         for (; depth1 > depth2; --depth1)
0135             n1 = n1->parent;
0136         for (; depth2 > depth1; --depth2)
0137             n2 = n2->parent;
0138         if (n1 == n2)
0139             return dcmp;
0140     }
0141     return trieSameGenerationCompare(n1, n2);
0142 }
0143 
0144 static int trieStringCompare(TrieNode* node, const char* s, int& pos, int len)
0145 {
0146     int cmp = 0;
0147     if (node->parent)
0148         cmp = trieStringCompare (node->parent, s, pos, len);
0149     if (!cmp) {
0150         if (pos > len)
0151             return 1;
0152         if (pos == len)
0153             return node->length ? 1 : 0;
0154         if (len - pos < static_cast<int>(node->length)) {
0155             cmp = memcmp(trieCharPtr(node), s + pos, len - pos);
0156             if (!cmp)
0157                 cmp = 1;
0158         } else {
0159             cmp = memcmp(trieCharPtr(node), s + pos, node->length);
0160         }
0161         pos += node->length;
0162     }
0163     return cmp;
0164 }
0165 
0166 static int trieStringCompare(TrieNode* node, const char* s)
0167 {
0168     if (!node)
0169         return !!s;
0170     if (!s)
0171         return 1;
0172     int pos = 0;
0173     int len = strlen(s);
0174     int cmp = trieStringCompare(node, s, pos, len);
0175 #ifdef TEST_TRIE
0176     fprintf(stderr, "== %s -> (%d %d) %d\n", s, pos, len, cmp);
0177 #endif
0178     if (cmp)
0179         return cmp;
0180     if (pos == len)
0181         return 0;
0182     return pos < len ? -1 : 1;
0183 }
0184 
0185 //first index in range [first,last) which does not compare less than c
0186 static int trieLowerBound(const TrieNode* n, int begin, int end, char c)
0187 {
0188     if (begin == end)
0189         return end;
0190     if (begin == end - 1)
0191         return trieCharPtr(n->children[begin])[0] >= c ? begin : end;
0192     int i = (begin + end)/2;
0193     char c1 = trieCharPtr(n->children[i])[0];
0194     if (c == c1)
0195         return i;
0196     if (c < c1)
0197         return trieLowerBound(n, begin, i, c);
0198     return trieLowerBound(n, i + 1, end, c);
0199 }
0200 
0201 static TrieNode* trieInsert(TrieNode* parent, const char* s, size_t len)
0202 {
0203     TrieNode* node;
0204 
0205     if (!*s)
0206         return parent;
0207 
0208     unsigned idx = trieLowerBound(parent, 0, parent->children.size(), s[0]);
0209     if (idx < parent->children.size()) {
0210         node = parent->children[idx];
0211         char* s2 = trieCharPtr(node);
0212         if (s[0] == s2[0]) {
0213             if (node->length == len
0214                     && !memcmp((void*)s, trieCharPtr(node), len)) {
0215                 return node;
0216             }
0217             for (unsigned i = 1; i < node->length; ++i) {
0218                 if (i == len) {
0219                     TrieNode* rep = new TrieNode(parent, s, i);
0220                     parent->children[idx] = rep;
0221                     node->update(rep, s2 + i, node->length - i);
0222                     rep->children.push_back(node);
0223                     return rep;
0224                 } else if (s[i] != s2[i]) {
0225                     TrieNode* rep = new TrieNode(parent, s2, i);
0226                     TrieNode* child = new TrieNode(rep, s + i, len - i);
0227                     bool cmp = s2[i] < s[i];
0228                     node->update(rep, s2 + i, node->length - i);
0229                     if (cmp) {
0230                         rep->children.push_back(node);
0231                         rep->children.push_back(child);
0232                     } else {
0233                         rep->children.push_back(child);
0234                         rep->children.push_back(node);
0235                     }
0236                     parent->children[idx] = rep;
0237                     return child;
0238                 }
0239             }
0240             return trieInsert(node, s + node->length, len - node->length);
0241         } else if (s[0] < s2[0]) {
0242             node = new TrieNode(parent, s, len);
0243             parent->children.insert(parent->children.begin()+idx, node);
0244             return node;
0245         }
0246     }
0247     node = new TrieNode(parent, s, len);
0248     parent->children.push_back(node);
0249     return node;
0250 }
0251 
0252 static void trieRemove(TrieNode* node)
0253 {
0254     if (node->children.size() > 1)
0255         return;
0256     TrieNode* parent = node->parent;
0257     if (!parent)
0258         return;
0259     char* s = trieCharPtr(node);
0260     assert(*s);
0261     unsigned idx = trieLowerBound(parent, 0, parent->children.size(), s[0]);
0262     assert(parent->children[idx] == node);
0263     if (node->children.size()) {
0264         TrieNode* child = node->children[0];
0265         char* s1 = (char*)malloc(child->length + node->length);
0266         memcpy(s1, s, node->length);
0267         memcpy(s1 + node->length, trieCharPtr(child), child->length);
0268         child->update(parent, s1, child->length + node->length);
0269         free(s1);
0270         parent->children[idx] = child;
0271         delete node;
0272     } else {
0273         parent->children.erase(parent->children.begin() + idx);
0274         delete node;
0275         if (!parent->ref_count)
0276             trieRemove(parent);
0277     }
0278 }
0279 
0280 static int trieStringStarts(TrieNode* node, const char* s, int& pos)
0281 {
0282     int cmp = -1; // -1 still matches, 0 no, 1 yes
0283     if (node->parent)
0284         cmp = trieStringStarts(node->parent, s, pos);
0285     if (cmp == -1) {
0286         char* s1 = trieCharPtr(node);
0287         for (unsigned i = 0; i < node->length; ++i)
0288             if (s1[i] != s[pos + i])
0289                 return !s[pos + i] ? 1 : 0;
0290         pos += node->length;
0291     }
0292     return cmp;
0293 }
0294 
0295 TrieString::TrieString (const QString& s) : node(nullptr)
0296 {
0297     if (!s.isNull()) {
0298         const QByteArray ba = s.toUtf8();
0299         node = trieInsert(trieRoot(), ba.constData(), ba.length());
0300         ++node->ref_count;
0301     }
0302 }
0303 
0304 TrieString::TrieString(const char* s)
0305     : node(!s ? nullptr : trieInsert(trieRoot(), s, strlen(s)))
0306 {
0307     if (node)
0308         ++node->ref_count;
0309 }
0310 
0311 TrieString::TrieString(const char* s, int len)
0312     : node(!s ? nullptr : trieInsert(trieRoot(), s, len))
0313 {
0314     if (node)
0315         ++node->ref_count;
0316 }
0317 
0318 TrieString::TrieString(const TrieString& s) : node(s.node)
0319 {
0320     if (node)
0321         ++node->ref_count;
0322 }
0323 
0324 TrieString::~TrieString()
0325 {
0326     if (node && !--node->ref_count) {
0327 #ifdef TEST_TRIE
0328         fprintf(stderr, "delete %s\n", qPrintable(toString()));
0329 #endif
0330         trieRemove(node);
0331     }
0332 }
0333 
0334 TrieString& TrieString::operator=(const char* s)
0335 {
0336     if (node && !--node->ref_count) {
0337 #ifdef TEST_TRIE
0338         fprintf(stderr, "= delete %s\n", qPrintable(toString()));
0339 #endif
0340         trieRemove(node);
0341     }
0342     node = !s ? nullptr : trieInsert(trieRoot(), s, strlen(s));
0343     if (node)
0344         ++node->ref_count;
0345     return *this;
0346 }
0347 
0348 TrieString& TrieString::operator=(const TrieString& s)
0349 {
0350     if (s.node != node) {
0351         if (s.node)
0352             ++s.node->ref_count;
0353         if (node && !--node->ref_count) {
0354 #ifdef TEST_TRIE
0355             fprintf(stderr, "= delete %s\n", qPrintable(toString()));
0356 #endif
0357             trieRemove(node);
0358         }
0359         node = s.node;
0360     }
0361     return *this;
0362 }
0363 
0364 bool TrieString::operator<(const TrieString& s) const
0365 {
0366     return trieCompare(node, s.node) < 0;
0367 }
0368 
0369 bool KMPlayer::operator==(const TrieString& t, const char* s)
0370 {
0371     return trieStringCompare(t.node, s) == 0;
0372 }
0373 
0374 bool TrieString::startsWith(const TrieString& s) const
0375 {
0376     for (TrieNode* n = node; n; n = n->parent)
0377         if (n == s.node)
0378             return true;
0379     return s.node ? false : true;
0380 }
0381 
0382 bool TrieString::startsWith(const char* str) const
0383 {
0384     if (!node)
0385         return !str ? true : false;
0386     if (!str)
0387         return true;
0388     int pos = 0;
0389     return trieStringStarts(node, str, pos) != 0;
0390 }
0391 
0392 QString TrieString::toString() const
0393 {
0394     if (!node)
0395         return QString();
0396     int len = 0;
0397     char* buf = trieRetrieveString(node, len);
0398     QString s = QString::fromUtf8(buf);
0399     free(buf);
0400     return s;
0401 }
0402 
0403 void TrieString::clear()
0404 {
0405     if (node && !--node->ref_count) {
0406 #ifdef TEST_TRIE
0407         fprintf(stderr, "clear delete %s\n", qPrintable(toString()));
0408 #endif
0409         trieRemove(node);
0410     }
0411     node = nullptr;
0412 }
0413 
0414 
0415 TrieString Ids::attr_id;
0416 TrieString Ids::attr_name;
0417 TrieString Ids::attr_src;
0418 TrieString Ids::attr_url;
0419 TrieString Ids::attr_href;
0420 TrieString Ids::attr_width;
0421 TrieString Ids::attr_height;
0422 TrieString Ids::attr_top;
0423 TrieString Ids::attr_left;
0424 TrieString Ids::attr_bottom;
0425 TrieString Ids::attr_right;
0426 TrieString Ids::attr_title;
0427 TrieString Ids::attr_begin;
0428 TrieString Ids::attr_dur;
0429 TrieString Ids::attr_end;
0430 TrieString Ids::attr_region;
0431 TrieString Ids::attr_target;
0432 TrieString Ids::attr_type;
0433 TrieString Ids::attr_value;
0434 TrieString Ids::attr_fill;
0435 TrieString Ids::attr_fit;
0436 
0437 void Ids::init() {
0438     attr_width = "width";
0439     attr_value = "value";
0440     attr_url = "url";
0441     attr_type = "type";
0442     attr_top = "top";
0443     attr_title = "title";
0444     attr_target = "target";
0445     attr_src = "src";
0446     attr_right = "right";
0447     attr_region = "region";
0448     attr_name = "name";
0449     attr_left = "left";
0450     attr_id = "id";
0451     attr_href = "href";
0452     attr_height = "height";
0453     attr_fit = "fit";
0454     attr_fill = "fill";
0455     attr_end = "end";
0456     attr_dur = "dur";
0457     attr_bottom = "bottom";
0458     attr_begin = "begin";
0459 }
0460 
0461 void Ids::reset() {
0462     attr_id.clear ();
0463     attr_name.clear ();
0464     attr_src.clear ();
0465     attr_url.clear ();
0466     attr_href.clear ();
0467     attr_width.clear ();
0468     attr_height.clear ();
0469     attr_top.clear ();
0470     attr_left.clear ();
0471     attr_bottom.clear ();
0472     attr_right.clear ();
0473     attr_title.clear ();
0474     attr_begin.clear ();
0475     attr_dur.clear ();
0476     attr_end.clear ();
0477     attr_region.clear ();
0478     attr_target.clear ();
0479     attr_type.clear ();
0480     attr_value.clear ();
0481     attr_fill.clear ();
0482     attr_fit.clear ();
0483     if (trieRoot()->children.size()) {
0484         qCWarning(LOG_KMPLAYER_COMMON) << "Trie not empty";
0485         dumpTrie ();
0486     //} else {
0487         //delete root_trie;
0488         //root_trie = 0;
0489     }
0490 }
0491 
0492 void KMPlayer::dumpTrie () {
0493     dump(trieRoot(), 0);
0494 }
0495 
0496 #ifdef TEST_TRIE
0497 // g++ triestring.cpp -o triestring -I$QTDIR/include -L$QTDIR/lib -lqt-mt -g -DTEST_TRIE
0498 
0499 int main (int, char **) {
0500     Ids::init();
0501     {
0502     TrieString s1;
0503     TrieString s1_1(QString ("region"));
0504     s1 = s1_1;
0505     TrieString s2 (QString ("regionName"));
0506     TrieString s3 (QString ("regPoint"));
0507     TrieString s4 (QString ("regAlign"));
0508     TrieString s6 (QString ("freeze"));
0509     TrieString s7 (QString ("fit"));
0510     {
0511     TrieString s7_1 (QString ("fit"));
0512     TrieString s5 (QString ("fill"));
0513     dump (root_trie, 0);
0514     }
0515     dump (root_trie, 0);
0516     TrieString s5 (QString ("fill"));
0517     TrieString s8 (QString ("fontPtSize"));
0518     TrieString s9 (QString ("fontSize"));
0519     TrieString s10 (QString ("fontFace"));
0520     TrieString s11 (QString ("fontColor"));
0521     TrieString s12 (QString ("hAlign"));
0522     TrieString s13 (QString ("region"));
0523     TrieString s14 (QString ("ref"));
0524     TrieString s15 (QString ("head"));
0525     dump (root_trie, 0);
0526     QString qs1 = s1.toString ();
0527     QString qs2 = s2.toString ();
0528     printf ("%s\n%s\n", qs1.toLatin1(), qs2.toLatin1());
0529     printf("equal %s %s %d\n", qs2.toLatin1(), "regionName", s2 == "regionName");
0530     printf("equal %s %s %d\n", qs2.toLatin1(), "zegionName", s2 == "zegionName");
0531     printf("equal %s %s %d\n", qs2.toLatin1(), "reqionName", s2 == "reqionName");
0532     printf("equal %s %s %d\n", qs2.toLatin1(), "regiinName", s2 == "regiinName");
0533     printf("equal %s %s %d\n", qs2.toLatin1(), "regionNeme", s2 == "regionNeme");
0534     printf("%s < %s %d\n", qs2.toLatin1(), "regionName", s2 < TrieString("regionName"));
0535     printf("%s < %s %d\n", qs2.toLatin1(), "zegion", s2 < TrieString("zegion"));
0536     printf("%s < %s %d\n", qs2.toLatin1(), "req", s2 < TrieString("req"));
0537     printf("%s < %s %d\n", qs2.toLatin1(), "regiinName", s2 < TrieString("regiinName"));
0538     printf("%s < %s %d\n", qs2.toLatin1(), "regionNeme", s2 < TrieString("regionNeme"));
0539     printf("%s startsWith %s %d\n", s1.toString().toLatin1(), "region", s1.startsWith ("region"));
0540     printf("%s startsWith %s %d\n", qs2.toLatin1(), "region", s2.startsWith ("region"));
0541     printf("%s startsWith %s %d\n", qs2.toLatin1(), "regi", s2.startsWith ("regi"));
0542     printf("%s startsWith %s %d\n", qs2.toLatin1(), "regian", s2.startsWith ("regian"));
0543     printf("%s startsWith %s %d\n", qs2.toLatin1(), "regio", s2.startsWith ("regio"));
0544     printf("%s startsWith %s %d\n", qs2.toLatin1(), "zegio", s2.startsWith ("zegio"));
0545     printf("%s startsWith %s %d\n", qs2.toLatin1(), "r", s2.startsWith ("r"));
0546     printf("%s startsWith %s %d\n", qs2.toLatin1(), "q", s2.startsWith ("q"));
0547     TrieString fnt ("font");
0548     printf("%s startsWith %s %d\n", s8.toString().toLatin1(), fnt.toString().toLatin1(), s8.startsWith(fnt));
0549     printf("%s startsWith %s %d\n", s8.toString().toLatin1(), s14.toString().toLatin1(), s8.startsWith(s14));
0550     }
0551     dump (root_trie, 0);
0552     Ids::reset();
0553     return 0;
0554 }
0555 #endif