File indexing completed on 2024-03-24 16:22:29

0001 /**
0002   This file belong to the KMPlayer project, a movie player plugin for Konqueror
0003   Copyright (C) 2007  Koos Vriezen <koos.vriezen@gmail.com>
0004 
0005   This library is free software; you can redistribute it and/or
0006   modify it under the terms of the GNU Lesser General Public
0007   License as published by the Free Software Foundation; either
0008   version 2 of the License, or (at your option) any later version.
0009 
0010   This library is distributed in the hope that it will be useful,
0011   but WITHOUT ANY WARRANTY; without even the implied warranty of
0012   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
0013   Lesser General Public License for more details.
0014 
0015   You should have received a copy of the GNU Lesser General Public
0016   License along with this library; if not, write to the Free Software
0017   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
0018 **/
0019 
0020 #ifdef TEST_TRIE
0021 # define KMPLAYER_NO_EXPORT
0022 # define KMPLAYER_EXPORT
0023 # define KDE_NO_EXPORT
0024 # define KDE_NO_CDTOR_EXPORT
0025 #else
0026 # include <config-kmplayer.h>
0027 # include "kmplayer_def.h"
0028 #endif
0029 #include <assert.h>
0030 #include <stdio.h>
0031 #include <stdlib.h>
0032 #include <vector>
0033 
0034 
0035 #include "triestring.h"
0036 
0037 namespace KMPlayer {
0038 
0039 struct TrieNode
0040 {
0041     enum { MaxPacked = sizeof (void*) };
0042 
0043     TrieNode() : ref_count(0), length(0), parent(NULL), buffer(NULL)
0044     {}
0045     TrieNode(TrieNode* p, const char* s, size_t len)
0046         : ref_count(0), length(0)
0047     {
0048         update(p, s, len);
0049     }
0050     ~TrieNode()
0051     {
0052         if (length > MaxPacked)
0053             free(buffer);
0054     }
0055     void update(TrieNode* p, const char* s, size_t len)
0056     {
0057         char* old = length > MaxPacked ? buffer : NULL;
0058         parent = p;
0059         length = len;
0060         if (len <= MaxPacked) {
0061             if ((unsigned)::abs(s - packed) > len)
0062                 memcpy(packed, s, len);
0063             else
0064                 memmove(packed, s, len);
0065         } else {
0066             buffer = (char*)malloc(len);
0067             memcpy(buffer, s, len);
0068         }
0069         if (old)
0070             free(old);
0071     }
0072 
0073     int ref_count;
0074     unsigned length;
0075     TrieNode* parent;
0076     std::vector<TrieNode*> children;
0077 
0078     union {
0079         char packed[MaxPacked];
0080         char* buffer;
0081     };
0082 };
0083 
0084 }
0085 
0086 using namespace KMPlayer;
0087 
0088 static char* trieCharPtr(TrieNode* n)
0089 {
0090     return n->length <= TrieNode::MaxPacked ? n->packed : n->buffer;
0091 }
0092 
0093 static char* trieRetrieveString (TrieNode* node, int& len)
0094 {
0095     char *buf;
0096     if (node->parent) {
0097         len += node->length;
0098         int p = len;
0099         buf = trieRetrieveString(node->parent, len);
0100         memcpy(buf+len-p, trieCharPtr(node), node->length);
0101     } else {
0102         buf = (char*)malloc(len + 1);
0103         buf[len] = 0;
0104     }
0105     return buf;
0106 }
0107 
0108 static TrieNode* trieRoot()
0109 {
0110     static TrieNode* trie_root;
0111     if (!trie_root)
0112         trie_root = new TrieNode();
0113     return trie_root;
0114 }
0115 
0116 static void dump(TrieNode* n, int indent)
0117 {
0118     for (int i =0; i < indent; ++i)
0119         fprintf(stderr, " ");
0120     fprintf(stderr, "'");
0121     for (unsigned i = 0; i < n->length; ++i)
0122         fprintf(stderr, "%c", trieCharPtr(n)[i]);
0123     fprintf(stderr, "'\n");
0124     for (unsigned i = 0; i < n->children.size(); ++i)
0125         dump(n->children[i], indent+2);
0126 }
0127 
0128 static int trieSameGenerationCompare(TrieNode* n1, TrieNode* n2)
0129 {
0130     if (n1->parent == n2->parent)
0131         return memcmp(trieCharPtr(n1), trieCharPtr(n2), std::min(n1->length, n2->length));
0132     return trieSameGenerationCompare(n1->parent, n2->parent);
0133 }
0134 
0135 static int trieCompare(TrieNode* n1, TrieNode* n2)
0136 {
0137     if (n1 == n2)
0138         return 0;
0139     int depth1 = 0, depth2 = 0;
0140     for (TrieNode* n = n1; n; n = n->parent)
0141         depth1++;
0142     if (!depth1)
0143         return n2 ? -1 : 0;
0144     for (TrieNode* n = n2; n; n = n->parent)
0145         depth2++;
0146     if (!depth2)
0147         return 1;
0148     if (depth1 != depth2) {
0149         int dcmp = depth1 > depth2 ? 1 : -1;
0150         for (; depth1 > depth2; --depth1)
0151             n1 = n1->parent;
0152         for (; depth2 > depth1; --depth2)
0153             n2 = n2->parent;
0154         if (n1 == n2)
0155             return dcmp;
0156     }
0157     return trieSameGenerationCompare(n1, n2);
0158 }
0159 
0160 static int trieStringCompare(TrieNode* node, const char* s, int& pos, int len)
0161 {
0162     int cmp = 0;
0163     if (node->parent)
0164         cmp = trieStringCompare (node->parent, s, pos, len);
0165     if (!cmp) {
0166         if (pos > len)
0167             return 1;
0168         if (pos == len)
0169             return node->length ? 1 : 0;
0170         if (len - pos < node->length) {
0171             cmp = memcmp(trieCharPtr(node), s + pos, len - pos);
0172             if (!cmp)
0173                 cmp = 1;
0174         } else {
0175             cmp = memcmp(trieCharPtr(node), s + pos, node->length);
0176         }
0177         pos += node->length;
0178     }
0179     return cmp;
0180 }
0181 
0182 static int trieStringCompare(TrieNode* node, const char* s)
0183 {
0184     if (!node)
0185         return !!s;
0186     if (!s)
0187         return 1;
0188     int pos = 0;
0189     int len = strlen(s);
0190     int cmp = trieStringCompare(node, s, pos, len);
0191 #ifdef TEST_TRIE
0192     fprintf(stderr, "== %s -> (%d %d) %d\n", s, pos, len, cmp);
0193 #endif
0194     if (cmp)
0195         return cmp;
0196     if (pos == len)
0197         return 0;
0198     return pos < len ? -1 : 1;
0199 }
0200 
0201 //first index in range [first,last) which does not compare less than c
0202 static int trieLowerBound(const TrieNode* n, int begin, int end, char c)
0203 {
0204     if (begin == end)
0205         return end;
0206     if (begin == end - 1)
0207         return trieCharPtr(n->children[begin])[0] >= c ? begin : end;
0208     int i = (begin + end)/2;
0209     char c1 = trieCharPtr(n->children[i])[0];
0210     if (c == c1)
0211         return i;
0212     if (c < c1)
0213         return trieLowerBound(n, begin, i, c);
0214     return trieLowerBound(n, i + 1, end, c);
0215 }
0216 
0217 static TrieNode* trieInsert(TrieNode* parent, const char* s, size_t len)
0218 {
0219     TrieNode* node;
0220 
0221     if (!*s)
0222         return parent;
0223 
0224     unsigned idx = trieLowerBound(parent, 0, parent->children.size(), s[0]);
0225     if (idx < parent->children.size()) {
0226         node = parent->children[idx];
0227         char* s2 = trieCharPtr(node);
0228         if (s[0] == s2[0]) {
0229             if (node->length == len
0230                     && !memcmp((void*)s, trieCharPtr(node), len)) {
0231                 return node;
0232             }
0233             for (unsigned i = 1; i < node->length; ++i) {
0234                 if (i == len) {
0235                     TrieNode* rep = new TrieNode(parent, s, i);
0236                     parent->children[idx] = rep;
0237                     node->update(rep, s2 + i, node->length - i);
0238                     rep->children.push_back(node);
0239                     return rep;
0240                 } else if (s[i] != s2[i]) {
0241                     TrieNode* rep = new TrieNode(parent, s2, i);
0242                     TrieNode* child = new TrieNode(rep, s + i, len - i);
0243                     bool cmp = s2[i] < s[i];
0244                     node->update(rep, s2 + i, node->length - i);
0245                     if (cmp) {
0246                         rep->children.push_back(node);
0247                         rep->children.push_back(child);
0248                     } else {
0249                         rep->children.push_back(child);
0250                         rep->children.push_back(node);
0251                     }
0252                     parent->children[idx] = rep;
0253                     return child;
0254                 }
0255             }
0256             return trieInsert(node, s + node->length, len - node->length);
0257         } else if (s[0] < s2[0]) {
0258             node = new TrieNode(parent, s, len);
0259             parent->children.insert(parent->children.begin()+idx, node);
0260             return node;
0261         }
0262     }
0263     node = new TrieNode(parent, s, len);
0264     parent->children.push_back(node);
0265     return node;
0266 }
0267 
0268 static void trieRemove(TrieNode* node)
0269 {
0270     if (node->children.size() > 1)
0271         return;
0272     TrieNode* parent = node->parent;
0273     if (!parent)
0274         return;
0275     char* s = trieCharPtr(node);
0276     assert(*s);
0277     unsigned idx = trieLowerBound(parent, 0, parent->children.size(), s[0]);
0278     assert(parent->children[idx] == node);
0279     if (node->children.size()) {
0280         TrieNode* child = node->children[0];
0281         char* s1 = (char*)malloc(child->length + node->length);
0282         memcpy(s1, s, node->length);
0283         memcpy(s1 + node->length, trieCharPtr(child), child->length);
0284         child->update(parent, s1, child->length + node->length);
0285         free(s1);
0286         parent->children[idx] = child;
0287         delete node;
0288     } else {
0289         parent->children.erase(parent->children.begin() + idx);
0290         delete node;
0291         if (!parent->ref_count)
0292             trieRemove(parent);
0293     }
0294 }
0295 
0296 static int trieStringStarts(TrieNode* node, const char* s, int& pos)
0297 {
0298     int cmp = -1; // -1 still matches, 0 no, 1 yes
0299     if (node->parent)
0300         cmp = trieStringStarts(node->parent, s, pos);
0301     if (cmp == -1) {
0302         char* s1 = trieCharPtr(node);
0303         for (unsigned i = 0; i < node->length; ++i)
0304             if (s1[i] != s[pos + i])
0305                 return !s[pos + i] ? 1 : 0;
0306         pos += node->length;
0307     }
0308     return cmp;
0309 }
0310 
0311 TrieString::TrieString (const QString& s) : node(NULL)
0312 {
0313     if (!s.isNull()) {
0314         const QByteArray ba = s.toUtf8();
0315         node = trieInsert(trieRoot(), ba.constData(), ba.length());
0316         ++node->ref_count;
0317     }
0318 }
0319 
0320 TrieString::TrieString(const char* s)
0321     : node(!s ? NULL : trieInsert(trieRoot(), s, strlen(s)))
0322 {
0323     if (node)
0324         ++node->ref_count;
0325 }
0326 
0327 TrieString::TrieString(const char* s, int len)
0328     : node(!s ? NULL : trieInsert(trieRoot(), s, len))
0329 {
0330     if (node)
0331         ++node->ref_count;
0332 }
0333 
0334 TrieString::TrieString(const TrieString& s) : node(s.node)
0335 {
0336     if (node)
0337         ++node->ref_count;
0338 }
0339 
0340 TrieString::~TrieString()
0341 {
0342     if (node && !--node->ref_count) {
0343 #ifdef TEST_TRIE
0344         fprintf(stderr, "delete %s\n", qPrintable(toString()));
0345 #endif
0346         trieRemove(node);
0347     }
0348 }
0349 
0350 TrieString& TrieString::operator=(const char* s)
0351 {
0352     if (node && !--node->ref_count) {
0353 #ifdef TEST_TRIE
0354         fprintf(stderr, "= delete %s\n", qPrintable(toString()));
0355 #endif
0356         trieRemove(node);
0357     }
0358     node = !s ? NULL : trieInsert(trieRoot(), s, strlen(s));
0359     if (node)
0360         ++node->ref_count;
0361     return *this;
0362 }
0363 
0364 TrieString& TrieString::operator=(const TrieString& s)
0365 {
0366     if (s.node != node) {
0367         if (s.node)
0368             ++s.node->ref_count;
0369         if (node && !--node->ref_count) {
0370 #ifdef TEST_TRIE
0371             fprintf(stderr, "= delete %s\n", qPrintable(toString()));
0372 #endif
0373             trieRemove(node);
0374         }
0375         node = s.node;
0376     }
0377     return *this;
0378 }
0379 
0380 bool TrieString::operator<(const TrieString& s) const
0381 {
0382     return trieCompare(node, s.node) < 0;
0383 }
0384 
0385 bool KMPlayer::operator==(const TrieString& t, const char* s)
0386 {
0387     return trieStringCompare(t.node, s) == 0;
0388 }
0389 
0390 bool TrieString::startsWith(const TrieString& s) const
0391 {
0392     for (TrieNode* n = node; n; n = n->parent)
0393         if (n == s.node)
0394             return true;
0395     return s.node ? false : true;
0396 }
0397 
0398 bool TrieString::startsWith(const char* str) const
0399 {
0400     if (!node)
0401         return !str ? true : false;
0402     if (!str)
0403         return true;
0404     int pos = 0;
0405     return trieStringStarts(node, str, pos) != 0;
0406 }
0407 
0408 QString TrieString::toString() const
0409 {
0410     if (!node)
0411         return QString();
0412     int len = 0;
0413     char* buf = trieRetrieveString(node, len);
0414     QString s = QString::fromUtf8(buf);
0415     free(buf);
0416     return s;
0417 }
0418 
0419 void TrieString::clear()
0420 {
0421     if (node && !--node->ref_count) {
0422 #ifdef TEST_TRIE
0423         fprintf(stderr, "clear delete %s\n", qPrintable(toString()));
0424 #endif
0425         trieRemove(node);
0426     }
0427     node = NULL;
0428 }
0429 
0430 
0431 TrieString Ids::attr_id;
0432 TrieString Ids::attr_name;
0433 TrieString Ids::attr_src;
0434 TrieString Ids::attr_url;
0435 TrieString Ids::attr_href;
0436 TrieString Ids::attr_width;
0437 TrieString Ids::attr_height;
0438 TrieString Ids::attr_top;
0439 TrieString Ids::attr_left;
0440 TrieString Ids::attr_bottom;
0441 TrieString Ids::attr_right;
0442 TrieString Ids::attr_title;
0443 TrieString Ids::attr_begin;
0444 TrieString Ids::attr_dur;
0445 TrieString Ids::attr_end;
0446 TrieString Ids::attr_region;
0447 TrieString Ids::attr_target;
0448 TrieString Ids::attr_type;
0449 TrieString Ids::attr_value;
0450 TrieString Ids::attr_fill;
0451 TrieString Ids::attr_fit;
0452 
0453 void Ids::init() {
0454     attr_width = "width";
0455     attr_value = "value";
0456     attr_url = "url";
0457     attr_type = "type";
0458     attr_top = "top";
0459     attr_title = "title";
0460     attr_target = "target";
0461     attr_src = "src";
0462     attr_right = "right";
0463     attr_region = "region";
0464     attr_name = "name";
0465     attr_left = "left";
0466     attr_id = "id";
0467     attr_href = "href";
0468     attr_height = "height";
0469     attr_fit = "fit";
0470     attr_fill = "fill";
0471     attr_end = "end";
0472     attr_dur = "dur";
0473     attr_bottom = "bottom";
0474     attr_begin = "begin";
0475 }
0476 
0477 void Ids::reset() {
0478     attr_id.clear ();
0479     attr_name.clear ();
0480     attr_src.clear ();
0481     attr_url.clear ();
0482     attr_href.clear ();
0483     attr_width.clear ();
0484     attr_height.clear ();
0485     attr_top.clear ();
0486     attr_left.clear ();
0487     attr_bottom.clear ();
0488     attr_right.clear ();
0489     attr_title.clear ();
0490     attr_begin.clear ();
0491     attr_dur.clear ();
0492     attr_end.clear ();
0493     attr_region.clear ();
0494     attr_target.clear ();
0495     attr_type.clear ();
0496     attr_value.clear ();
0497     attr_fill.clear ();
0498     attr_fit.clear ();
0499     if (trieRoot()->children.size()) {
0500         qWarning ("Trie not empty");
0501         dumpTrie ();
0502     //} else {
0503         //delete root_trie;
0504         //root_trie = 0;
0505     }
0506 }
0507 
0508 void KMPlayer::dumpTrie () {
0509     dump(trieRoot(), 0);
0510 }
0511 
0512 #ifdef TEST_TRIE
0513 // g++ triestring.cpp -o triestring -I$QTDIR/include -L$QTDIR/lib -lqt-mt -g -DTEST_TRIE
0514 
0515 int main (int, char **) {
0516     Ids::init();
0517     {
0518     TrieString s1;
0519     TrieString s1_1(QString ("region"));
0520     s1 = s1_1;
0521     TrieString s2 (QString ("regionName"));
0522     TrieString s3 (QString ("regPoint"));
0523     TrieString s4 (QString ("regAlign"));
0524     TrieString s6 (QString ("freeze"));
0525     TrieString s7 (QString ("fit"));
0526     {
0527     TrieString s7_1 (QString ("fit"));
0528     TrieString s5 (QString ("fill"));
0529     dump (root_trie, 0);
0530     }
0531     dump (root_trie, 0);
0532     TrieString s5 (QString ("fill"));
0533     TrieString s8 (QString ("fontPtSize"));
0534     TrieString s9 (QString ("fontSize"));
0535     TrieString s10 (QString ("fontFace"));
0536     TrieString s11 (QString ("fontColor"));
0537     TrieString s12 (QString ("hAlign"));
0538     TrieString s13 (QString ("region"));
0539     TrieString s14 (QString ("ref"));
0540     TrieString s15 (QString ("head"));
0541     dump (root_trie, 0);
0542     QString qs1 = s1.toString ();
0543     QString qs2 = s2.toString ();
0544     printf ("%s\n%s\n", qs1.toAscii(), qs2.toAscii());
0545     printf("equal %s %s %d\n", qs2.toAscii(), "regionName", s2 == "regionName");
0546     printf("equal %s %s %d\n", qs2.toAscii(), "zegionName", s2 == "zegionName");
0547     printf("equal %s %s %d\n", qs2.toAscii(), "reqionName", s2 == "reqionName");
0548     printf("equal %s %s %d\n", qs2.toAscii(), "regiinName", s2 == "regiinName");
0549     printf("equal %s %s %d\n", qs2.toAscii(), "regionNeme", s2 == "regionNeme");
0550     printf("%s < %s %d\n", qs2.toAscii(), "regionName", s2 < TrieString("regionName"));
0551     printf("%s < %s %d\n", qs2.toAscii(), "zegion", s2 < TrieString("zegion"));
0552     printf("%s < %s %d\n", qs2.toAscii(), "req", s2 < TrieString("req"));
0553     printf("%s < %s %d\n", qs2.toAscii(), "regiinName", s2 < TrieString("regiinName"));
0554     printf("%s < %s %d\n", qs2.toAscii(), "regionNeme", s2 < TrieString("regionNeme"));
0555     printf("%s startsWith %s %d\n", s1.toString().toAscii(), "region", s1.startsWith ("region"));
0556     printf("%s startsWith %s %d\n", qs2.toAscii(), "region", s2.startsWith ("region"));
0557     printf("%s startsWith %s %d\n", qs2.toAscii(), "regi", s2.startsWith ("regi"));
0558     printf("%s startsWith %s %d\n", qs2.toAscii(), "regian", s2.startsWith ("regian"));
0559     printf("%s startsWith %s %d\n", qs2.toAscii(), "regio", s2.startsWith ("regio"));
0560     printf("%s startsWith %s %d\n", qs2.toAscii(), "zegio", s2.startsWith ("zegio"));
0561     printf("%s startsWith %s %d\n", qs2.toAscii(), "r", s2.startsWith ("r"));
0562     printf("%s startsWith %s %d\n", qs2.toAscii(), "q", s2.startsWith ("q"));
0563     TrieString fnt ("font");
0564     printf("%s startsWith %s %d\n", s8.toString().toAscii(), fnt.toString().toAscii(), s8.startsWith(fnt));
0565     printf("%s startsWith %s %d\n", s8.toString().toAscii(), s14.toString().toAscii(), s8.startsWith(s14));
0566     }
0567     dump (root_trie, 0);
0568     Ids::reset();
0569     return 0;
0570 }
0571 #endif