Warning, file /frameworks/khtml/src/xml/dom_stringimpl.cpp was not indexed or was modified since last indexation (in which case cross-reference links may be missing, inaccurate or erroneous).
0001 /** 0002 * This file is part of the DOM implementation for KDE. 0003 * 0004 * Copyright (C) 1999-2003 Lars Knoll (knoll@kde.org) 0005 * (C) 1999 Antti Koivisto (koivisto@kde.org) 0006 * (C) 2001-2003 Dirk Mueller ( mueller@kde.org ) 0007 * (C) 2002, 2004 Apple Computer, Inc. 0008 * 0009 * This library is free software; you can redistribute it and/or 0010 * modify it under the terms of the GNU Library General Public 0011 * License as published by the Free Software Foundation; either 0012 * version 2 of the License, or (at your option) any later version. 0013 * 0014 * This library is distributed in the hope that it will be useful, 0015 * but WITHOUT ANY WARRANTY; without even the implied warranty of 0016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 0017 * Library General Public License for more details. 0018 * 0019 * You should have received a copy of the GNU Library General Public License 0020 * along with this library; see the file COPYING.LIB. If not, write to 0021 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 0022 * Boston, MA 02110-1301, USA. 0023 * 0024 */ 0025 0026 #include "dom_stringimpl.h" 0027 0028 #include <string.h> 0029 #include <QMutableStringListIterator> 0030 #include "misc/AtomicString.h" 0031 0032 using namespace DOM; 0033 using namespace khtml; 0034 0035 DOMStringImpl::DOMStringImpl(const char *str) : m_hash(0), m_inTable(0), m_shallowCopy(0) 0036 { 0037 if (str && *str) { 0038 l = strlen(str); 0039 s = QT_ALLOC_QCHAR_VEC(l); 0040 int i = l; 0041 QChar *ptr = s; 0042 while (i--) { 0043 *ptr++ = *str++; 0044 } 0045 } else { 0046 s = QT_ALLOC_QCHAR_VEC(1); // crash protection 0047 s[0] = 0x0; // == QChar::null; 0048 l = 0; 0049 } 0050 } 0051 0052 DOMStringImpl::DOMStringImpl(const char *str, uint len) : m_hash(0), m_inTable(0), m_shallowCopy(0) 0053 { 0054 if (str && *str) { 0055 l = len; 0056 s = QT_ALLOC_QCHAR_VEC(l); 0057 int i = l; 0058 QChar *ptr = s; 0059 while (i--) { 0060 *ptr++ = *str++; 0061 } 0062 } else { 0063 s = QT_ALLOC_QCHAR_VEC(1); // crash protection 0064 s[0] = 0x0; // == QChar::null; 0065 l = 0; 0066 } 0067 } 0068 0069 DOMStringImpl::DOMStringImpl(const char *str, unsigned len/*gth*/, unsigned hash) : m_hash(hash), m_inTable(true), m_shallowCopy(0) 0070 { 0071 if (str && *str) { 0072 l = len; 0073 s = QT_ALLOC_QCHAR_VEC(l); 0074 int i = l; 0075 QChar *ptr = s; 0076 while (i--) { 0077 *ptr++ = *str++; 0078 } 0079 } else { 0080 s = QT_ALLOC_QCHAR_VEC(1); // crash protection 0081 s[0] = 0x0; // == QChar::null; 0082 l = 0; 0083 } 0084 } 0085 0086 DOMStringImpl::~DOMStringImpl() 0087 { 0088 if (m_shallowCopy) { 0089 return; 0090 } 0091 if (m_inTable) { 0092 khtml::AtomicString::remove(this); 0093 } 0094 if (s) { 0095 QT_DELETE_QCHAR_VEC(s); 0096 } 0097 } 0098 0099 // FIXME: should be a cached flag maybe. 0100 bool DOMStringImpl::containsOnlyWhitespace() const 0101 { 0102 if (!s) { 0103 return true; 0104 } 0105 0106 for (uint i = 0; i < l; i++) { 0107 QChar c = s[i]; 0108 if (c.unicode() <= 0x7F) { 0109 if (c.unicode() > ' ') { 0110 return false; 0111 } 0112 } else { 0113 if (c.direction() != QChar::DirWS) { 0114 return false; 0115 } 0116 } 0117 } 0118 return true; 0119 } 0120 0121 void DOMStringImpl::append(DOMStringImpl *str) 0122 { 0123 if (str && str->l != 0) { 0124 int newlen = l + str->l; 0125 QChar *c = QT_ALLOC_QCHAR_VEC(newlen); 0126 memcpy(c, s, l * sizeof(QChar)); 0127 memcpy(c + l, str->s, str->l * sizeof(QChar)); 0128 if (s) { 0129 QT_DELETE_QCHAR_VEC(s); 0130 } 0131 s = c; 0132 l = newlen; 0133 } 0134 } 0135 0136 void DOMStringImpl::insert(DOMStringImpl *str, unsigned int pos) 0137 { 0138 if (pos > l) { 0139 append(str); 0140 return; 0141 } 0142 if (str && str->l != 0) { 0143 int newlen = l + str->l; 0144 QChar *c = QT_ALLOC_QCHAR_VEC(newlen); 0145 memcpy(c, s, pos * sizeof(QChar)); 0146 memcpy(c + pos, str->s, str->l * sizeof(QChar)); 0147 memcpy(c + pos + str->l, s + pos, (l - pos)*sizeof(QChar)); 0148 if (s) { 0149 QT_DELETE_QCHAR_VEC(s); 0150 } 0151 s = c; 0152 l = newlen; 0153 } 0154 } 0155 0156 void DOMStringImpl::truncate(int len) 0157 { 0158 if (len > (int)l) { 0159 return; 0160 } 0161 0162 int nl = len < 1 ? 1 : len; 0163 QChar *c = QT_ALLOC_QCHAR_VEC(nl); 0164 memcpy(c, s, nl * sizeof(QChar)); 0165 if (s) { 0166 QT_DELETE_QCHAR_VEC(s); 0167 } 0168 s = c; 0169 l = len; 0170 } 0171 0172 void DOMStringImpl::remove(unsigned int pos, int len) 0173 { 0174 if (pos >= l) { 0175 return; 0176 } 0177 if (pos + len > l) { 0178 len = l - pos; 0179 } 0180 0181 uint newLen = l - len; 0182 QChar *c = QT_ALLOC_QCHAR_VEC(newLen); 0183 memcpy(c, s, pos * sizeof(QChar)); 0184 memcpy(c + pos, s + pos + len, (l - len - pos)*sizeof(QChar)); 0185 if (s) { 0186 QT_DELETE_QCHAR_VEC(s); 0187 } 0188 s = c; 0189 l = newLen; 0190 } 0191 0192 DOMStringImpl *DOMStringImpl::split(unsigned int pos) 0193 { 0194 if (pos >= l) { 0195 return new DOMStringImpl(); 0196 } 0197 0198 uint newLen = l - pos; 0199 DOMStringImpl *str = new DOMStringImpl(s + pos, newLen); 0200 truncate(pos); 0201 return str; 0202 } 0203 0204 DOMStringImpl *DOMStringImpl::substring(unsigned int pos, unsigned int len) 0205 { 0206 if (pos >= l) { 0207 return new DOMStringImpl(); 0208 } 0209 if (len == UINT_MAX || pos + len > l) { 0210 len = l - pos; 0211 } 0212 0213 return new DOMStringImpl(s + pos, len); 0214 } 0215 0216 // Collapses white-space according to CSS 2.1 rules 0217 DOMStringImpl *DOMStringImpl::collapseWhiteSpace(bool preserveLF, bool preserveWS) 0218 { 0219 if (preserveLF && preserveWS) { 0220 return this; 0221 } 0222 0223 // Notice we are likely allocating more space than needed (worst case) 0224 QChar *n = QT_ALLOC_QCHAR_VEC(l); 0225 0226 unsigned int pos = 0; 0227 bool collapsing = false; // collapsing white-space 0228 bool collapsingLF = false; // collapsing around linefeed 0229 bool changedLF = false; 0230 for (unsigned int i = 0; i < l; i++) { 0231 ushort ch = s[i].unicode(); 0232 0233 // We act on \r as we would on \n because CSS uses it to indicate new-line 0234 if (ch == '\r') { 0235 ch = '\n'; 0236 } else 0237 // ### The XML parser lets \t through, for now treat them as spaces 0238 if (ch == '\t') { 0239 ch = ' '; 0240 } 0241 0242 if (!preserveLF && ch == '\n') { 0243 // ### Not strictly correct according to CSS3 text-module. 0244 // - In ideographic languages linefeed should be ignored 0245 // - and in Thai and Khmer it should be treated as a zero-width space 0246 ch = ' '; // Treat as space 0247 changedLF = true; 0248 } 0249 0250 if (collapsing) { 0251 if (ch == ' ') { 0252 continue; 0253 } 0254 if (ch == '\n') { 0255 collapsingLF = true; 0256 continue; 0257 } 0258 0259 n[pos++] = (collapsingLF) ? QLatin1Char('\n') : QLatin1Char(' '); 0260 collapsing = false; 0261 collapsingLF = false; 0262 } else if (!preserveWS && ch == ' ') { 0263 collapsing = true; 0264 continue; 0265 } else if (!preserveWS && ch == '\n') { 0266 collapsing = true; 0267 collapsingLF = true; 0268 continue; 0269 } 0270 0271 n[pos++] = ch; 0272 } 0273 if (collapsing) { 0274 n[pos++] = ((collapsingLF) ? QLatin1Char('\n') : QLatin1Char(' ')); 0275 } 0276 0277 if (pos == l && !changedLF) { 0278 QT_DELETE_QCHAR_VEC(n); 0279 return this; 0280 } else { 0281 DOMStringImpl *out = new DOMStringImpl(); 0282 out->s = n; 0283 out->l = pos; 0284 0285 return out; 0286 } 0287 } 0288 0289 static Length parseLength(const QChar *s, unsigned int l) 0290 { 0291 if (l == 0) { 0292 return Length(1, Relative); 0293 } 0294 0295 unsigned i = 0; 0296 while (i < l && s[i].isSpace()) { 0297 ++i; 0298 } 0299 if (i < l && (s[i] == '+' || s[i] == '-')) { 0300 ++i; 0301 } 0302 while (i < l && s[i].isDigit()) { 0303 ++i; 0304 } 0305 0306 bool ok; 0307 int r = QString::fromRawData(s, i).toInt(&ok); 0308 0309 /* Skip over any remaining digits, we are not that accurate (5.5% => 5%) */ 0310 while (i < l && (s[i].isDigit() || s[i] == '.')) { 0311 ++i; 0312 } 0313 0314 /* IE Quirk: Skip any whitespace (20 % => 20%) */ 0315 while (i < l && s[i].isSpace()) { 0316 ++i; 0317 } 0318 0319 if (ok) { 0320 if (i == l) { 0321 return Length(r, Fixed); 0322 } else { 0323 const QChar *next = s + i; 0324 0325 if (*next == '%') { 0326 return Length(static_cast<double>(r), Percent); 0327 } 0328 0329 if (*next == '*') { 0330 return Length(r, Relative); 0331 } 0332 } 0333 return Length(r, Fixed); 0334 } else { 0335 if (i < l) { 0336 const QChar *next = s + i; 0337 0338 if (*next == '*') { 0339 return Length(1, Relative); 0340 } 0341 0342 if (*next == '%') { 0343 return Length(1, Relative); 0344 } 0345 } 0346 } 0347 return Length(0, Relative); 0348 } 0349 0350 khtml::Length *DOMStringImpl::toCoordsArray(int &len) const 0351 { 0352 QString str(s, l); 0353 for (unsigned int i = 0; i < l; i++) { 0354 QChar cc = s[i]; 0355 if (cc > '9' || (cc < '0' && cc != '-' && cc != '*' && cc != '.')) { 0356 str[i] = ' '; 0357 } 0358 } 0359 str = str.simplified(); 0360 0361 len = str.count(' ') + 1; 0362 khtml::Length *r = new khtml::Length[len]; 0363 0364 int j = 0; 0365 int pos = 0; 0366 int pos2; 0367 0368 while ((pos2 = str.indexOf(QLatin1Char(' '), pos)) != -1) { 0369 r[j++] = parseLength((QChar *) str.unicode() + pos, pos2 - pos); 0370 pos = pos2 + 1; 0371 } 0372 r[j] = parseLength((QChar *) str.unicode() + pos, str.length() - pos); 0373 0374 return r; 0375 } 0376 0377 khtml::Length *DOMStringImpl::toLengthArray(int &len) const 0378 { 0379 QString str(s, l); 0380 str = str.simplified(); 0381 0382 len = str.count(QLatin1Char(',')) + 1; 0383 0384 // If we have no commas, we have no array. 0385 if (len == 1) { 0386 return nullptr; 0387 } 0388 0389 khtml::Length *r = new khtml::Length[len]; 0390 0391 int i = 0; 0392 int pos = 0; 0393 int pos2; 0394 0395 while ((pos2 = str.indexOf(QLatin1Char(','), pos)) != -1) { 0396 r[i++] = parseLength((QChar *) str.unicode() + pos, pos2 - pos); 0397 pos = pos2 + 1; 0398 } 0399 0400 /* IE Quirk: If the last comma is the last char skip it and reduce len by one */ 0401 if (str.length() - pos > 0) { 0402 r[i] = parseLength((QChar *) str.unicode() + pos, str.length() - pos); 0403 } else { 0404 len--; 0405 } 0406 0407 return r; 0408 } 0409 0410 bool DOMStringImpl::isLower() const 0411 { 0412 unsigned int i; 0413 for (i = 0; i < l; i++) 0414 if (s[i].toLower() != s[i]) { 0415 return false; 0416 } 0417 return true; 0418 } 0419 0420 DOMStringImpl *DOMStringImpl::lower() const 0421 { 0422 DOMStringImpl *c = new DOMStringImpl; 0423 if (!l) { 0424 return c; 0425 } 0426 0427 c->s = QT_ALLOC_QCHAR_VEC(l); 0428 c->l = l; 0429 0430 for (unsigned int i = 0; i < l; i++) { 0431 c->s[i] = s[i].toLower(); 0432 } 0433 0434 return c; 0435 } 0436 0437 DOMStringImpl *DOMStringImpl::upper() const 0438 { 0439 DOMStringImpl *c = new DOMStringImpl; 0440 if (!l) { 0441 return c; 0442 } 0443 0444 c->s = QT_ALLOC_QCHAR_VEC(l); 0445 c->l = l; 0446 0447 for (unsigned int i = 0; i < l; i++) { 0448 c->s[i] = s[i].toUpper(); 0449 } 0450 0451 return c; 0452 } 0453 0454 DOMStringImpl *DOMStringImpl::capitalize(bool noFirstCap) const 0455 { 0456 bool canCapitalize = !noFirstCap; 0457 DOMStringImpl *c = new DOMStringImpl; 0458 if (!l) { 0459 return c; 0460 } 0461 0462 c->s = QT_ALLOC_QCHAR_VEC(l); 0463 c->l = l; 0464 0465 for (unsigned int i = 0; i < l; i++) { 0466 if (s[i].isLetterOrNumber() && canCapitalize) { 0467 c->s[i] = s[i].toUpper(); 0468 canCapitalize = false; 0469 } else { 0470 c->s[i] = s[i]; 0471 if (s[i].isSpace()) { 0472 canCapitalize = true; 0473 } 0474 } 0475 } 0476 0477 return c; 0478 } 0479 0480 QString DOMStringImpl::string() const 0481 { 0482 return QString(s, l); 0483 } 0484 0485 int DOMStringImpl::toInt(bool *ok) const 0486 { 0487 // match \s*[+-]?\d* 0488 unsigned i = 0; 0489 while (i < l && s[i].isSpace()) { 0490 ++i; 0491 } 0492 if (i < l && (s[i] == '+' || s[i] == '-')) { 0493 ++i; 0494 } 0495 while (i < l && s[i].isDigit()) { 0496 ++i; 0497 } 0498 0499 return QString::fromRawData(s, i).toInt(ok); 0500 } 0501 0502 float DOMStringImpl::toFloat(bool *ok) const 0503 { 0504 return QString::fromRawData(s, l).toFloat(ok); 0505 } 0506 0507 bool DOMStringImpl::endsWith(DOMStringImpl *str, CaseSensitivity cs) const 0508 { 0509 if (l < str->l) { 0510 return false; 0511 } 0512 const QChar *a = s + l - 1; 0513 const QChar *b = str->s + str->l - 1; 0514 int i = str->l; 0515 if (cs == CaseSensitive) { 0516 while (i--) { 0517 if (*a != *b) { 0518 return false; 0519 } 0520 a--, b--; 0521 } 0522 } else { 0523 while (i--) { 0524 if (a->toLower() != b->toLower()) { 0525 return false; 0526 } 0527 a--, b--; 0528 } 0529 } 0530 return true; 0531 } 0532 0533 bool DOMStringImpl::startsWith(DOMStringImpl *str, CaseSensitivity cs) const 0534 { 0535 if (l < str->l) { 0536 return false; 0537 } 0538 const QChar *a = s; 0539 const QChar *b = str->s; 0540 int i = str->l; 0541 if (cs == CaseSensitive) { 0542 while (i--) { 0543 if (*a != *b) { 0544 return false; 0545 } 0546 a++, b++; 0547 } 0548 } else { 0549 while (i--) { 0550 if (a->toLower() != b->toLower()) { 0551 return false; 0552 } 0553 a++, b++; 0554 } 0555 } 0556 return true; 0557 } 0558 0559 DOMStringImpl *DOMStringImpl::substring(unsigned pos, unsigned len) const 0560 { 0561 if (pos >= l) { 0562 return nullptr; 0563 } 0564 if (len > l - pos) { 0565 len = l - pos; 0566 } 0567 return new DOMStringImpl(s + pos, len); 0568 } 0569 0570 static const unsigned short amp[] = {'&', 'a', 'm', 'p', ';'}; 0571 static const unsigned short lt[] = {'&', 'l', 't', ';'}; 0572 static const unsigned short gt[] = {'&', 'g', 't', ';'}; 0573 static const unsigned short nbsp[] = {'&', 'n', 'b', 's', 'p', ';'}; 0574 0575 DOMStringImpl *DOMStringImpl::escapeHTML() 0576 { 0577 unsigned outL = 0; 0578 for (unsigned int i = 0; i < l; ++i) { 0579 if (s[i] == '&') { 0580 outL += 5; //& 0581 } else if (s[i] == '<' || s[i] == '>') { 0582 outL += 4; //>/< 0583 } else if (s[i] == QChar::Nbsp) { 0584 outL += 6; // 0585 } else { 0586 ++outL; 0587 } 0588 } 0589 if (outL == l) { 0590 return this; 0591 } 0592 0593 DOMStringImpl *toRet = new DOMStringImpl(); 0594 toRet->s = QT_ALLOC_QCHAR_VEC(outL); 0595 toRet->l = outL; 0596 0597 unsigned outP = 0; 0598 for (unsigned int i = 0; i < l; ++i) { 0599 if (s[i] == '&') { 0600 memcpy(&toRet->s[outP], amp, sizeof(amp)); 0601 outP += 5; 0602 } else if (s[i] == '<') { 0603 memcpy(&toRet->s[outP], lt, sizeof(lt)); 0604 outP += 4; 0605 } else if (s[i] == '>') { 0606 memcpy(&toRet->s[outP], gt, sizeof(gt)); 0607 outP += 4; 0608 } else if (s[i] == QChar::Nbsp) { 0609 memcpy(&toRet->s[outP], nbsp, sizeof(nbsp)); 0610 outP += 6; 0611 } else { 0612 toRet->s[outP] = s[i]; 0613 ++outP; 0614 } 0615 } 0616 return toRet; 0617 } 0618 0619 enum NoFoldTag { DoNotFold }; 0620 enum FoldLowerTag { FoldLower }; 0621 enum FoldUpperTag { FoldUpper }; 0622 0623 static inline 0624 unsigned short foldChar(unsigned short c, NoFoldTag) 0625 { 0626 return c; 0627 } 0628 0629 static inline 0630 unsigned short foldChar(unsigned short c, FoldLowerTag) 0631 { 0632 // ### fast path for first ones? 0633 return QChar::toLower(c); 0634 } 0635 0636 static inline 0637 unsigned short foldChar(unsigned short c, FoldUpperTag) 0638 { 0639 // ### fast path for first ones? 0640 return QChar::toUpper(c); 0641 } 0642 0643 // Paul Hsieh's SuperFastHash 0644 // http://www.azillionmonkeys.com/qed/hash.html 0645 0646 // Golden ratio - arbitrary start value to avoid mapping all 0's to all 0's 0647 // or anything like that. 0648 const unsigned PHI = 0x9e3779b9U; 0649 0650 template<typename FoldTag> 0651 static unsigned calcHash(const QChar *s, unsigned l, FoldTag foldMode) 0652 { 0653 // Note: this is originally from KJS 0654 unsigned hash = PHI; 0655 unsigned tmp; 0656 0657 int rem = l & 1; 0658 l >>= 1; 0659 0660 // Main loop 0661 for (; l > 0; l--) { 0662 hash += foldChar(s[0].unicode(), foldMode); 0663 tmp = (foldChar(s[1].unicode(), foldMode) << 11) ^ hash; 0664 hash = (hash << 16) ^ tmp; 0665 s += 2; 0666 hash += hash >> 11; 0667 } 0668 0669 // Handle end case 0670 if (rem) { 0671 hash += foldChar(s[0].unicode(), foldMode); 0672 hash ^= hash << 11; 0673 hash += hash >> 17; 0674 } 0675 0676 // Force "avalanching" of final 127 bits 0677 hash ^= hash << 3; 0678 hash += hash >> 5; 0679 hash ^= hash << 2; 0680 hash += hash >> 15; 0681 hash ^= hash << 10; 0682 0683 // this avoids ever returning a hash code of 0, since that is used to 0684 // signal "hash not computed yet", using a value that is likely to be 0685 // effectively the same as 0 when the low bits are masked 0686 if (hash == 0) { 0687 hash = 0x80000000; 0688 } 0689 0690 return hash; 0691 } 0692 0693 unsigned DOMStringImpl::hash() const 0694 { 0695 if (m_hash != 0) { 0696 return m_hash; 0697 } 0698 0699 return m_hash = calcHash(s, l, DoNotFold); 0700 } 0701 0702 unsigned DOMStringImpl::lowerHash() const 0703 { 0704 return calcHash(s, l, FoldLower); 0705 } 0706 0707 unsigned DOMStringImpl::upperHash() const 0708 { 0709 return calcHash(s, l, FoldUpper); 0710 } 0711 0712 unsigned DOMStringImpl::computeHash(const QChar *str, unsigned int length) 0713 { 0714 return calcHash(str, length, DoNotFold); 0715 } 0716 0717 DOMStringImpl *DOMStringImpl::empty() 0718 { 0719 static DOMString e(""); 0720 return e.implementation(); 0721 } 0722 0723 bool DOM::strcasecmp(const DOMStringImpl *a, const DOMStringImpl *b) 0724 { 0725 if (!(a && b)) { 0726 return (a && a->l) || (b && b->l); 0727 } 0728 if (a->l != b->l) { 0729 return true; 0730 } 0731 QChar *ai = a->s; 0732 QChar *bi = b->s; 0733 int l = a->l; 0734 while (l--) { 0735 if (*ai != *bi && ai->toLower() != bi->toLower()) { 0736 return true; 0737 } 0738 ++ai, ++bi; 0739 } 0740 return false; 0741 } 0742