File indexing completed on 2024-05-12 15:43:36
0001 /* 0002 * This file is part of the KDE libraries 0003 * Copyright (C) 1999-2000 Harri Porten (porten@kde.org) 0004 * Copyright (C) 2004, 2005, 2006, 2007 Apple Inc. All rights reserved. 0005 * Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca) 0006 * 0007 * This library is free software; you can redistribute it and/or 0008 * modify it under the terms of the GNU Library General Public 0009 * License as published by the Free Software Foundation; either 0010 * version 2 of the License, or (at your option) any later version. 0011 * 0012 * This library is distributed in the hope that it will be useful, 0013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 0014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 0015 * Library General Public License for more details. 0016 * 0017 * You should have received a copy of the GNU Library General Public License 0018 * along with this library; see the file COPYING.LIB. If not, write to 0019 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 0020 * Boston, MA 02110-1301, USA. 0021 * 0022 */ 0023 0024 #include "ustring.h" 0025 0026 #include <stdlib.h> 0027 #include <stdio.h> 0028 #include "wtf/DisallowCType.h" 0029 #include "wtf/ASCIICType.h" 0030 #if HAVE_STRINGS_H 0031 #include <strings.h> 0032 #endif 0033 #include <limits.h> 0034 0035 #include "operations.h" 0036 #include "function.h" 0037 #include "identifier.h" 0038 #include <math.h> 0039 #include "dtoa.h" 0040 #include "commonunicode.h" 0041 0042 0043 using std::max; 0044 0045 // GCC cstring uses these automatically, but not all implementations do. 0046 using std::strlen; 0047 using std::strcpy; 0048 using std::strncpy; 0049 using std::memset; 0050 using std::memcpy; 0051 0052 using namespace WTF; 0053 0054 namespace KJS 0055 { 0056 0057 extern const double NaN; 0058 extern const double Inf; 0059 0060 static inline size_t overflowIndicator() 0061 { 0062 return std::numeric_limits<size_t>::max(); 0063 } 0064 static inline size_t maxUChars() 0065 { 0066 // We don't want strings to get too crazy, since OOM hurts... and since we use 32-bit lengths 0067 // on 64-bit, too, keeping this small prevents overflows. 0068 return 0xFFFFFFF; 0069 } 0070 0071 static inline UChar *allocChars(size_t length) 0072 { 0073 assert(length); 0074 if (length > maxUChars()) { 0075 return nullptr; 0076 } 0077 return static_cast<UChar *>(fastMalloc(sizeof(UChar) * length)); 0078 } 0079 0080 static inline UChar *reallocChars(UChar *buffer, size_t length) 0081 { 0082 ASSERT(length); 0083 if (length > maxUChars()) { 0084 return nullptr; 0085 } 0086 return static_cast<UChar *>(fastRealloc(buffer, sizeof(UChar) * length)); 0087 } 0088 0089 CString::CString(const char *c) 0090 { 0091 length = strlen(c); 0092 data = new char[length + 1]; 0093 memcpy(data, c, length + 1); 0094 } 0095 0096 CString::CString(const char *c, size_t len) 0097 { 0098 length = len; 0099 data = new char[len + 1]; 0100 memcpy(data, c, len); 0101 data[len] = 0; 0102 } 0103 0104 CString::CString(const CString &b) 0105 { 0106 length = b.length; 0107 if (length > 0 && b.data) { 0108 data = new char[length + 1]; 0109 memcpy(data, b.data, length + 1); 0110 } else { 0111 data = nullptr; 0112 } 0113 } 0114 0115 CString::~CString() 0116 { 0117 delete [] data; 0118 } 0119 0120 CString &CString::operator=(const char *c) 0121 { 0122 if (data) { 0123 delete [] data; 0124 } 0125 length = strlen(c); 0126 data = new char[length + 1]; 0127 memcpy(data, c, length + 1); 0128 0129 return *this; 0130 } 0131 0132 CString &CString::operator=(const CString &str) 0133 { 0134 if (this == &str) { 0135 return *this; 0136 } 0137 0138 if (data) { 0139 delete [] data; 0140 } 0141 length = str.length; 0142 if (str.data) { 0143 data = new char[length + 1]; 0144 memcpy(data, str.data, length + 1); 0145 } else { 0146 data = nullptr; 0147 } 0148 0149 return *this; 0150 } 0151 0152 bool operator==(const CString &c1, const CString &c2) 0153 { 0154 size_t len = c1.size(); 0155 return len == c2.size() && (len == 0 || memcmp(c1.c_str(), c2.c_str(), len) == 0); 0156 } 0157 0158 // Hack here to avoid a global with a constructor; point to an unsigned short instead of a UChar. 0159 static unsigned short almostUChar; 0160 UString::Rep UString::Rep::null = { 0, 0, 1, 0, 0, &UString::Rep::null, 0, nullptr, 0, 0, 0, 0 }; 0161 UString::Rep UString::Rep::empty = { 0, 0, 1, 0, 0, &UString::Rep::empty, 0, reinterpret_cast<UChar *>(&almostUChar), 0, 0, 0, 0 }; 0162 const int normalStatBufferSize = 4096; 0163 static char *statBuffer = nullptr; // FIXME: This buffer is never deallocated. 0164 static int statBufferSize = 0; 0165 0166 PassRefPtr<UString::Rep> UString::Rep::createCopying(const UChar *d, int length) 0167 { 0168 UChar *copyD = allocChars(length); 0169 memcpy(copyD, d, length * sizeof(UChar)); 0170 0171 return create(copyD, length); 0172 } 0173 0174 PassRefPtr<UString::Rep> UString::Rep::create(UChar *d, int l) 0175 { 0176 Rep *r = new Rep; 0177 r->offset = 0; 0178 r->len = l; 0179 r->rc = 1; 0180 r->_hash = 0; 0181 r->isIdentifier = 0; 0182 r->baseString = r; 0183 r->reportedCost = 0; 0184 r->buf = d; 0185 r->usedCapacity = l; 0186 r->capacity = l; 0187 r->usedPreCapacity = 0; 0188 r->preCapacity = 0; 0189 0190 // steal the single reference this Rep was created with 0191 return adoptRef(r); 0192 } 0193 0194 PassRefPtr<UString::Rep> UString::Rep::create(PassRefPtr<Rep> base, int offset, int length) 0195 { 0196 assert(base); 0197 0198 int baseOffset = base->offset; 0199 0200 base = base->baseString; 0201 0202 assert(-(offset + baseOffset) <= base->usedPreCapacity); 0203 assert(offset + baseOffset + length <= base->usedCapacity); 0204 0205 Rep *r = new Rep; 0206 r->offset = baseOffset + offset; 0207 r->len = length; 0208 r->rc = 1; 0209 r->_hash = 0; 0210 r->isIdentifier = 0; 0211 r->baseString = base.releaseRef(); 0212 r->reportedCost = 0; 0213 r->buf = nullptr; 0214 r->usedCapacity = 0; 0215 r->capacity = 0; 0216 r->usedPreCapacity = 0; 0217 r->preCapacity = 0; 0218 0219 // steal the single reference this Rep was created with 0220 return adoptRef(r); 0221 } 0222 0223 void UString::Rep::destroy() 0224 { 0225 if (isIdentifier) { 0226 Identifier::remove(this); 0227 } 0228 if (baseString != this) { 0229 baseString->deref(); 0230 } else { 0231 fastFree(buf); 0232 } 0233 delete this; 0234 } 0235 0236 // Golden ratio - arbitrary start value to avoid mapping all 0's to all 0's 0237 // or anything like that. 0238 const unsigned PHI = 0x9e3779b9U; 0239 0240 // Paul Hsieh's SuperFastHash 0241 // http://www.azillionmonkeys.com/qed/hash.html 0242 unsigned UString::Rep::computeHash(const UChar *s, int len) 0243 { 0244 unsigned l = len; 0245 uint32_t hash = PHI; 0246 uint32_t tmp; 0247 0248 int rem = l & 1; 0249 l >>= 1; 0250 0251 // Main loop 0252 for (; l > 0; l--) { 0253 hash += s[0].uc; 0254 tmp = (s[1].uc << 11) ^ hash; 0255 hash = (hash << 16) ^ tmp; 0256 s += 2; 0257 hash += hash >> 11; 0258 } 0259 0260 // Handle end case 0261 if (rem) { 0262 hash += s[0].uc; 0263 hash ^= hash << 11; 0264 hash += hash >> 17; 0265 } 0266 0267 // Force "avalanching" of final 127 bits 0268 hash ^= hash << 3; 0269 hash += hash >> 5; 0270 hash ^= hash << 2; 0271 hash += hash >> 15; 0272 hash ^= hash << 10; 0273 0274 // this avoids ever returning a hash code of 0, since that is used to 0275 // signal "hash not computed yet", using a value that is likely to be 0276 // effectively the same as 0 when the low bits are masked 0277 if (hash == 0) { 0278 hash = 0x80000000; 0279 } 0280 0281 return hash; 0282 } 0283 0284 // Paul Hsieh's SuperFastHash 0285 // http://www.azillionmonkeys.com/qed/hash.html 0286 unsigned UString::Rep::computeHash(const char *s, int len) 0287 { 0288 // This hash is designed to work on 16-bit chunks at a time. But since the normal case 0289 // (above) is to hash UTF-16 characters, we just treat the 8-bit chars as if they 0290 // were 16-bit chunks, which should give matching results 0291 0292 uint32_t hash = PHI; 0293 uint32_t tmp; 0294 unsigned l = len; 0295 0296 int rem = l & 1; 0297 l >>= 1; 0298 0299 // Main loop 0300 for (; l > 0; l--) { 0301 hash += (unsigned char)s[0]; 0302 tmp = ((unsigned char)s[1] << 11) ^ hash; 0303 hash = (hash << 16) ^ tmp; 0304 s += 2; 0305 hash += hash >> 11; 0306 } 0307 0308 // Handle end case 0309 if (rem) { 0310 hash += (unsigned char)s[0]; 0311 hash ^= hash << 11; 0312 hash += hash >> 17; 0313 } 0314 0315 // Force "avalanching" of final 127 bits 0316 hash ^= hash << 3; 0317 hash += hash >> 5; 0318 hash ^= hash << 2; 0319 hash += hash >> 15; 0320 hash ^= hash << 10; 0321 0322 // this avoids ever returning a hash code of 0, since that is used to 0323 // signal "hash not computed yet", using a value that is likely to be 0324 // effectively the same as 0 when the low bits are masked 0325 if (hash == 0) { 0326 hash = 0x80000000; 0327 } 0328 0329 return hash; 0330 } 0331 0332 unsigned UString::Rep::computeHash(const char *s) 0333 { 0334 return computeHash(s, strlen(s)); 0335 } 0336 0337 // put these early so they can be inlined 0338 inline size_t UString::expandedSize(size_t size, size_t otherSize) const 0339 { 0340 // Do the size calculation in two parts, returning overflowIndicator if 0341 // we overflow the maximum value that we can handle. 0342 0343 if (size > maxUChars()) { 0344 return overflowIndicator(); 0345 } 0346 0347 size_t expandedSize = ((size + 10) / 10 * 11) + 1; 0348 if (maxUChars() - expandedSize < otherSize) { 0349 return overflowIndicator(); 0350 } 0351 0352 return expandedSize + otherSize; 0353 } 0354 0355 inline int UString::usedCapacity() const 0356 { 0357 return m_rep->baseString->usedCapacity; 0358 } 0359 0360 inline int UString::usedPreCapacity() const 0361 { 0362 return m_rep->baseString->usedPreCapacity; 0363 } 0364 0365 void UString::expandCapacity(int requiredLength) 0366 { 0367 Rep *r = m_rep->baseString; 0368 0369 if (requiredLength > r->capacity) { 0370 size_t newCapacity = expandedSize(requiredLength, r->preCapacity); 0371 UChar *oldBuf = r->buf; 0372 r->buf = reallocChars(r->buf, newCapacity); 0373 if (!r->buf) { 0374 r->buf = oldBuf; 0375 m_rep = &Rep::null; 0376 return; 0377 } 0378 r->capacity = newCapacity - r->preCapacity; 0379 } 0380 if (requiredLength > r->usedCapacity) { 0381 r->usedCapacity = requiredLength; 0382 } 0383 } 0384 0385 void UString::expandPreCapacity(int requiredPreCap) 0386 { 0387 Rep *r = m_rep->baseString; 0388 0389 if (requiredPreCap > r->preCapacity) { 0390 size_t newCapacity = expandedSize(requiredPreCap, r->capacity); 0391 int delta = newCapacity - r->capacity - r->preCapacity; 0392 0393 UChar *newBuf = allocChars(newCapacity); 0394 if (!newBuf) { 0395 m_rep = &Rep::null; 0396 return; 0397 } 0398 memcpy(newBuf + delta, r->buf, (r->capacity + r->preCapacity) * sizeof(UChar)); 0399 fastFree(r->buf); 0400 r->buf = newBuf; 0401 0402 r->preCapacity = newCapacity - r->capacity; 0403 } 0404 if (requiredPreCap > r->usedPreCapacity) { 0405 r->usedPreCapacity = requiredPreCap; 0406 } 0407 } 0408 0409 UString::UString(Empty) 0410 : m_rep(&Rep::empty) 0411 { 0412 } 0413 0414 UString::UString(char c) 0415 : m_rep(Rep::create(allocChars(1), 1)) 0416 { 0417 m_rep->buf[0] = static_cast<unsigned char>(c); 0418 } 0419 0420 UString::UString(const char *c) 0421 { 0422 if (!c) { 0423 m_rep = &Rep::null; 0424 return; 0425 } 0426 0427 if (!c[0]) { 0428 m_rep = &Rep::empty; 0429 return; 0430 } 0431 0432 size_t length = strlen(c); 0433 UChar *d = allocChars(length); 0434 if (!d) { 0435 m_rep = &Rep::null; 0436 } else { 0437 for (size_t i = 0; i < length; i++) { 0438 d[i].uc = c[i]; 0439 } 0440 m_rep = Rep::create(d, static_cast<int>(length)); 0441 } 0442 } 0443 0444 UString::UString(const char *c, size_t length) 0445 { 0446 if (!c) { 0447 m_rep = &Rep::null; 0448 return; 0449 } 0450 0451 if (length == 0) { 0452 m_rep = &Rep::empty; 0453 return; 0454 } 0455 0456 UChar *d = allocChars(length); 0457 if (!d) { 0458 m_rep = &Rep::null; 0459 } else { 0460 for (size_t i = 0; i < length; i++) { 0461 d[i].uc = c[i]; 0462 } 0463 m_rep = Rep::create(d, static_cast<int>(length)); 0464 } 0465 } 0466 0467 UString::UString(const UChar *c, int length) 0468 { 0469 if (length == 0) { 0470 m_rep = &Rep::empty; 0471 } else { 0472 m_rep = Rep::createCopying(c, length); 0473 } 0474 } 0475 0476 UString::UString(UChar *c, int length, bool copy) 0477 { 0478 if (length == 0) { 0479 m_rep = &Rep::empty; 0480 } else if (copy) { 0481 m_rep = Rep::createCopying(c, length); 0482 } else { 0483 m_rep = Rep::create(c, length); 0484 } 0485 } 0486 0487 UString::UString(const Vector<UChar> &buffer) 0488 { 0489 if (!buffer.size()) { 0490 m_rep = &Rep::empty; 0491 } else { 0492 m_rep = Rep::createCopying(buffer.data(), buffer.size()); 0493 } 0494 } 0495 0496 UString::UString(const UString &a, const UString &b) 0497 { 0498 int aSize = a.size(); 0499 int aOffset = a.m_rep->offset; 0500 int bSize = b.size(); 0501 int bOffset = b.m_rep->offset; 0502 int length = aSize + bSize; 0503 0504 // possible cases: 0505 0506 if (aSize == 0) { 0507 // a is empty 0508 m_rep = b.m_rep; 0509 } else if (bSize == 0) { 0510 // b is empty 0511 m_rep = a.m_rep; 0512 } else if (aOffset + aSize == a.usedCapacity() && aSize >= minShareSize && 4 * aSize >= bSize && 0513 (-bOffset != b.usedPreCapacity() || aSize >= bSize)) { 0514 // - a reaches the end of its buffer so it qualifies for shared append 0515 // - also, it's at least a quarter the length of b - appending to a much shorter 0516 // string does more harm than good 0517 // - however, if b qualifies for prepend and is longer than a, we'd rather prepend 0518 UString x(a); 0519 x.expandCapacity(aOffset + length); 0520 if (a.data() && x.data()) { 0521 memcpy(const_cast<UChar *>(a.data() + aSize), b.data(), bSize * sizeof(UChar)); 0522 m_rep = Rep::create(a.m_rep, 0, length); 0523 } else { 0524 m_rep = &Rep::null; 0525 } 0526 } else if (-bOffset == b.usedPreCapacity() && bSize >= minShareSize && 4 * bSize >= aSize) { 0527 // - b reaches the beginning of its buffer so it qualifies for shared prepend 0528 // - also, it's at least a quarter the length of a - prepending to a much shorter 0529 // string does more harm than good 0530 UString y(b); 0531 y.expandPreCapacity(-bOffset + aSize); 0532 if (b.data() && y.data()) { 0533 memcpy(const_cast<UChar *>(b.data() - aSize), a.data(), aSize * sizeof(UChar)); 0534 m_rep = Rep::create(b.m_rep, -aSize, length); 0535 } else { 0536 m_rep = &Rep::null; 0537 } 0538 } else { 0539 // a does not qualify for append, and b does not qualify for prepend, gotta make a whole new string 0540 size_t newCapacity = expandedSize(length, 0); 0541 UChar *d = allocChars(newCapacity); 0542 if (!d) { 0543 m_rep = &Rep::null; 0544 } else { 0545 memcpy(d, a.data(), aSize * sizeof(UChar)); 0546 memcpy(d + aSize, b.data(), bSize * sizeof(UChar)); 0547 m_rep = Rep::create(d, length); 0548 m_rep->capacity = newCapacity; 0549 } 0550 } 0551 } 0552 0553 const UString &UString::null() 0554 { 0555 static UString *n = new UString; 0556 return *n; 0557 } 0558 0559 UString UString::from(int i) 0560 { 0561 UChar buf[1 + sizeof(i) * 3]; 0562 UChar *end = buf + sizeof(buf) / sizeof(UChar); 0563 UChar *p = end; 0564 0565 if (i == 0) { 0566 *--p = '0'; 0567 } else if (i == INT_MIN) { 0568 char minBuf[1 + sizeof(i) * 3]; 0569 sprintf(minBuf, "%d", INT_MIN); 0570 return UString(minBuf); 0571 } else { 0572 bool negative = false; 0573 if (i < 0) { 0574 negative = true; 0575 i = -i; 0576 } 0577 while (i) { 0578 *--p = (unsigned short)((i % 10) + '0'); 0579 i /= 10; 0580 } 0581 if (negative) { 0582 *--p = '-'; 0583 } 0584 } 0585 0586 return UString(p, static_cast<int>(end - p)); 0587 } 0588 0589 UString UString::from(unsigned int u) 0590 { 0591 UChar buf[sizeof(u) * 3]; 0592 UChar *end = buf + sizeof(buf) / sizeof(UChar); 0593 UChar *p = end; 0594 0595 if (u == 0) { 0596 *--p = '0'; 0597 } else { 0598 while (u) { 0599 *--p = (unsigned short)((u % 10) + '0'); 0600 u /= 10; 0601 } 0602 } 0603 0604 return UString(p, static_cast<int>(end - p)); 0605 } 0606 0607 UString UString::from(long l) 0608 { 0609 UChar buf[1 + sizeof(l) * 3]; 0610 UChar *end = buf + sizeof(buf) / sizeof(UChar); 0611 UChar *p = end; 0612 0613 if (l == 0) { 0614 *--p = '0'; 0615 } else if (l == LONG_MIN) { 0616 char minBuf[1 + sizeof(l) * 3]; 0617 sprintf(minBuf, "%ld", LONG_MIN); 0618 return UString(minBuf); 0619 } else { 0620 bool negative = false; 0621 if (l < 0) { 0622 negative = true; 0623 l = -l; 0624 } 0625 while (l) { 0626 *--p = (unsigned short)((l % 10) + '0'); 0627 l /= 10; 0628 } 0629 if (negative) { 0630 *--p = '-'; 0631 } 0632 } 0633 0634 return UString(p, static_cast<int>(end - p)); 0635 } 0636 0637 UString UString::from(double d) 0638 { 0639 // avoid ever printing -NaN, in JS conceptually there is only one NaN value 0640 if (isNaN(d)) { 0641 return UString("NaN", 3); 0642 } 0643 0644 char buf[80]; 0645 int decimalPoint; 0646 int sign; 0647 0648 char *result = kjs_dtoa(d, 0, 0, &decimalPoint, &sign, nullptr); 0649 int length = static_cast<int>(strlen(result)); 0650 0651 int i = 0; 0652 if (sign) { 0653 buf[i++] = '-'; 0654 } 0655 0656 if (decimalPoint <= 0 && decimalPoint > -6) { 0657 buf[i++] = '0'; 0658 buf[i++] = '.'; 0659 for (int j = decimalPoint; j < 0; j++) { 0660 buf[i++] = '0'; 0661 } 0662 strcpy(buf + i, result); 0663 i += length; 0664 } else if (decimalPoint <= 21 && decimalPoint > 0) { 0665 if (length <= decimalPoint) { 0666 strcpy(buf + i, result); 0667 i += length; 0668 for (int j = 0; j < decimalPoint - length; j++) { 0669 buf[i++] = '0'; 0670 } 0671 // buf[i] = '\0'; 0672 } else { 0673 strncpy(buf + i, result, decimalPoint); 0674 i += decimalPoint; 0675 buf[i++] = '.'; 0676 strcpy(buf + i, result + decimalPoint); 0677 i += length - decimalPoint; 0678 } 0679 } else if (result[0] < '0' || result[0] > '9') { 0680 strcpy(buf + i, result); 0681 i += length; 0682 } else { 0683 buf[i++] = result[0]; 0684 if (length > 1) { 0685 buf[i++] = '.'; 0686 strcpy(buf + i, result + 1); 0687 i += length - 1; 0688 } 0689 0690 buf[i++] = 'e'; 0691 buf[i++] = (decimalPoint >= 0) ? '+' : '-'; 0692 // decimalPoint can't be more than 3 digits decimal given the 0693 // nature of float representation 0694 int exponential = decimalPoint - 1; 0695 if (exponential < 0) { 0696 exponential = exponential * -1; 0697 } 0698 if (exponential >= 100) { 0699 buf[i++] = '0' + exponential / 100; 0700 } 0701 if (exponential >= 10) { 0702 buf[i++] = '0' + (exponential % 100) / 10; 0703 } 0704 buf[i++] = '0' + exponential % 10; 0705 // buf[i++] = '\0'; 0706 } 0707 0708 kjs_freedtoa(result); 0709 0710 return UString(buf, i); 0711 } 0712 0713 UString UString::spliceSubstringsWithSeparators(const Range *substringRanges, int rangeCount, const UString *separators, int separatorCount) const 0714 { 0715 if (rangeCount == 1 && separatorCount == 0) { 0716 int thisSize = size(); 0717 int position = substringRanges[0].position; 0718 int length = substringRanges[0].length; 0719 if (position <= 0 && length >= thisSize) { 0720 return *this; 0721 } 0722 return UString::Rep::create(m_rep, maxInt(0, position), minInt(thisSize, length)); 0723 } 0724 0725 int totalLength = 0; 0726 for (int i = 0; i < rangeCount; i++) { 0727 totalLength += substringRanges[i].length; 0728 } 0729 for (int i = 0; i < separatorCount; i++) { 0730 totalLength += separators[i].size(); 0731 } 0732 0733 if (totalLength == 0) { 0734 return ""; 0735 } 0736 0737 UChar *buffer = allocChars(totalLength); 0738 if (!buffer) { 0739 return null(); 0740 } 0741 0742 int maxCount = max(rangeCount, separatorCount); 0743 int bufferPos = 0; 0744 for (int i = 0; i < maxCount; i++) { 0745 if (i < rangeCount) { 0746 memcpy(buffer + bufferPos, data() + substringRanges[i].position, substringRanges[i].length * sizeof(UChar)); 0747 bufferPos += substringRanges[i].length; 0748 } 0749 if (i < separatorCount) { 0750 memcpy(buffer + bufferPos, separators[i].data(), separators[i].size() * sizeof(UChar)); 0751 bufferPos += separators[i].size(); 0752 } 0753 } 0754 0755 return UString::Rep::create(buffer, totalLength); 0756 } 0757 0758 // Append a sub-string of <subStr> to this string. 0759 // Equivalent to append(subStr.substr(subPos, subLength)) 0760 0761 UString &UString::append(const UString &subStr, int subPos, int subLength) 0762 { 0763 int subSize = subStr.size(); 0764 0765 if (subPos < 0) { 0766 subPos = 0; 0767 } else if (subPos >= subSize) { 0768 subPos = subSize; 0769 } 0770 if (subLength < 0) { 0771 subLength = subSize; 0772 } 0773 if (subPos + subLength >= subSize) { 0774 subLength = subSize - subPos; 0775 } 0776 0777 return append(UString(subStr.data() + subPos, subLength)); 0778 } 0779 0780 UString &UString::append(const UString &t) 0781 { 0782 int thisSize = size(); 0783 int thisOffset = m_rep->offset; 0784 int tSize = t.size(); 0785 int length = thisSize + tSize; 0786 0787 // possible cases: 0788 if (thisSize == 0) { 0789 // this is empty 0790 *this = t; 0791 } else if (tSize == 0) { 0792 // t is empty 0793 } else if (m_rep->baseIsSelf() && m_rep->rc == 1) { 0794 // this is direct and has refcount of 1 (so we can just alter it directly) 0795 expandCapacity(thisOffset + length); 0796 if (data()) { 0797 memcpy(const_cast<UChar *>(data() + thisSize), t.data(), tSize * sizeof(UChar)); 0798 m_rep->len = length; 0799 m_rep->_hash = 0; 0800 } 0801 } else if (thisOffset + thisSize == usedCapacity() && thisSize >= minShareSize) { 0802 // this reaches the end of the buffer - extend it if it's long enough to append to 0803 expandCapacity(thisOffset + length); 0804 if (data()) { 0805 memcpy(const_cast<UChar *>(data() + thisSize), t.data(), tSize * sizeof(UChar)); 0806 m_rep = Rep::create(m_rep, 0, length); 0807 } 0808 } else { 0809 // this is shared with someone using more capacity, gotta make a whole new string 0810 size_t newCapacity = expandedSize(length, 0); 0811 UChar *d = allocChars(newCapacity); 0812 if (!d) { 0813 m_rep = &Rep::null; 0814 } else { 0815 memcpy(d, data(), thisSize * sizeof(UChar)); 0816 memcpy(const_cast<UChar *>(d + thisSize), t.data(), tSize * sizeof(UChar)); 0817 m_rep = Rep::create(d, length); 0818 m_rep->capacity = newCapacity; 0819 } 0820 } 0821 0822 return *this; 0823 } 0824 0825 UString &UString::append(const char *t) 0826 { 0827 int thisSize = size(); 0828 int thisOffset = m_rep->offset; 0829 int tSize = static_cast<int>(strlen(t)); 0830 int length = thisSize + tSize; 0831 0832 // possible cases: 0833 if (thisSize == 0) { 0834 // this is empty 0835 *this = t; 0836 } else if (tSize == 0) { 0837 // t is empty, we'll just return *this below. 0838 } else if (m_rep->baseIsSelf() && m_rep->rc == 1) { 0839 // this is direct and has refcount of 1 (so we can just alter it directly) 0840 expandCapacity(thisOffset + length); 0841 UChar *d = const_cast<UChar *>(data()); 0842 if (d) { 0843 for (int i = 0; i < tSize; ++i) { 0844 d[thisSize + i] = t[i]; 0845 } 0846 m_rep->len = length; 0847 m_rep->_hash = 0; 0848 } 0849 } else if (thisOffset + thisSize == usedCapacity() && thisSize >= minShareSize) { 0850 // this string reaches the end of the buffer - extend it 0851 expandCapacity(thisOffset + length); 0852 UChar *d = const_cast<UChar *>(data()); 0853 if (d) { 0854 for (int i = 0; i < tSize; ++i) { 0855 d[thisSize + i] = t[i]; 0856 } 0857 m_rep = Rep::create(m_rep, 0, length); 0858 } 0859 } else { 0860 // this is shared with someone using more capacity, gotta make a whole new string 0861 size_t newCapacity = expandedSize(length, 0); 0862 UChar *d = allocChars(newCapacity); 0863 if (!d) { 0864 m_rep = &Rep::null; 0865 } else { 0866 memcpy(d, data(), thisSize * sizeof(UChar)); 0867 for (int i = 0; i < tSize; ++i) { 0868 d[thisSize + i] = t[i]; 0869 } 0870 m_rep = Rep::create(d, length); 0871 m_rep->capacity = newCapacity; 0872 } 0873 } 0874 0875 return *this; 0876 } 0877 0878 UString &UString::append(unsigned short c) 0879 { 0880 int thisOffset = m_rep->offset; 0881 int length = size(); 0882 0883 // possible cases: 0884 if (length == 0) { 0885 // this is empty - must make a new m_rep because we don't want to pollute the shared empty one 0886 size_t newCapacity = expandedSize(1, 0); 0887 UChar *d = allocChars(newCapacity); 0888 if (!d) { 0889 m_rep = &Rep::null; 0890 } else { 0891 d[0] = c; 0892 m_rep = Rep::create(d, 1); 0893 m_rep->capacity = newCapacity; 0894 } 0895 } else if (m_rep->baseIsSelf() && m_rep->rc == 1) { 0896 // this is direct and has refcount of 1 (so we can just alter it directly) 0897 expandCapacity(thisOffset + length + 1); 0898 UChar *d = const_cast<UChar *>(data()); 0899 if (d) { 0900 d[length] = c; 0901 m_rep->len = length + 1; 0902 m_rep->_hash = 0; 0903 } 0904 } else if (thisOffset + length == usedCapacity() && length >= minShareSize) { 0905 // this reaches the end of the string - extend it and share 0906 expandCapacity(thisOffset + length + 1); 0907 UChar *d = const_cast<UChar *>(data()); 0908 if (d) { 0909 d[length] = c; 0910 m_rep = Rep::create(m_rep, 0, length + 1); 0911 } 0912 } else { 0913 // this is shared with someone using more capacity, gotta make a whole new string 0914 size_t newCapacity = expandedSize(length + 1, 0); 0915 UChar *d = allocChars(newCapacity); 0916 if (!d) { 0917 m_rep = &Rep::null; 0918 } else { 0919 memcpy(d, data(), length * sizeof(UChar)); 0920 d[length] = c; 0921 m_rep = Rep::create(d, length + 1); 0922 m_rep->capacity = newCapacity; 0923 } 0924 } 0925 0926 return *this; 0927 } 0928 0929 CString UString::cstring() const 0930 { 0931 return ascii(); 0932 } 0933 0934 char *UString::ascii() const 0935 { 0936 // Never make the buffer smaller than normalStatBufferSize. 0937 // Thus we almost never need to reallocate. 0938 int length = size(); 0939 int neededSize = length + 1; 0940 if (neededSize < normalStatBufferSize) { 0941 neededSize = normalStatBufferSize; 0942 } 0943 if (neededSize != statBufferSize) { 0944 delete [] statBuffer; 0945 statBuffer = new char [neededSize]; 0946 statBufferSize = neededSize; 0947 } 0948 0949 const UChar *p = data(); 0950 char *q = statBuffer; 0951 const UChar *limit = p + length; 0952 while (p != limit) { 0953 *q = static_cast<char>(p->uc); 0954 ++p; 0955 ++q; 0956 } 0957 *q = '\0'; 0958 0959 return statBuffer; 0960 } 0961 0962 UString &UString::operator=(Empty) 0963 { 0964 m_rep = &Rep::empty; 0965 0966 return *this; 0967 } 0968 0969 UString &UString::operator=(const char *c) 0970 { 0971 set(c, c ? strlen(c) : 0); 0972 0973 return *this; 0974 } 0975 0976 void UString::set(const char *c, int l) 0977 { 0978 if (!c) { 0979 m_rep = &Rep::null; 0980 return; 0981 } 0982 0983 if (l == 0) { 0984 m_rep = &Rep::empty; 0985 return; 0986 } 0987 0988 UChar *d; 0989 if (m_rep->rc == 1 && l <= m_rep->capacity && m_rep->baseIsSelf() && m_rep->offset == 0 && m_rep->preCapacity == 0) { 0990 d = m_rep->buf; 0991 m_rep->_hash = 0; 0992 m_rep->len = l; 0993 } else { 0994 d = allocChars(l); 0995 if (!d) { 0996 m_rep = &Rep::null; 0997 return; 0998 } 0999 m_rep = Rep::create(d, l); 1000 } 1001 for (int i = 0; i < l; i++) { 1002 d[i].uc = static_cast<unsigned char>(c[i]); 1003 } 1004 } 1005 1006 bool UString::is8Bit() const 1007 { 1008 const UChar *u = data(); 1009 const UChar *limit = u + size(); 1010 while (u < limit) { 1011 if (u->uc > 0xFF) { 1012 return false; 1013 } 1014 ++u; 1015 } 1016 1017 return true; 1018 } 1019 1020 const UChar UString::operator[](int pos) const 1021 { 1022 if (pos >= size()) { 1023 return '\0'; 1024 } 1025 return data()[pos]; 1026 } 1027 1028 double UString::toDouble(bool tolerateTrailingJunk, bool tolerateEmptyString) const 1029 { 1030 double d; 1031 1032 const int length = size(); 1033 int leadingSpaces = 0; 1034 1035 // skip leading white space 1036 while (leadingSpaces < length && CommonUnicode::isStrWhiteSpace(data()[leadingSpaces].uc)) { 1037 ++leadingSpaces; 1038 } 1039 1040 UString whitespaceSkipped = substr(leadingSpaces, length - leadingSpaces); 1041 1042 // FIXME: If tolerateTrailingJunk is true, then we want to tolerate non-8-bit junk 1043 // after the number, so is8Bit is too strict a check. 1044 if (!whitespaceSkipped.is8Bit()) { 1045 return NaN; 1046 } 1047 1048 const char *c = whitespaceSkipped.ascii(); 1049 1050 // empty string ? 1051 if (*c == '\0') { 1052 return tolerateEmptyString ? 0.0 : NaN; 1053 } 1054 1055 // hex number ? 1056 if (*c == '0' && (*(c + 1) == 'x' || *(c + 1) == 'X')) { 1057 const char *firstDigitPosition = c + 2; 1058 c++; 1059 d = 0.0; 1060 while (*(++c)) { 1061 if (*c >= '0' && *c <= '9') { 1062 d = d * 16.0 + *c - '0'; 1063 } else if ((*c >= 'A' && *c <= 'F') || (*c >= 'a' && *c <= 'f')) { 1064 d = d * 16.0 + (*c & 0xdf) - 'A' + 10.0; 1065 } else { 1066 break; 1067 } 1068 } 1069 1070 if (d >= mantissaOverflowLowerBound) { 1071 d = parseIntOverflow(firstDigitPosition, c - firstDigitPosition, 16); 1072 } 1073 } else { 1074 // regular number ? 1075 char *end; 1076 d = kjs_strtod(c, &end); 1077 if ((d != 0.0 || end != c) && d != Inf && d != -Inf) { 1078 c = end; 1079 } else { 1080 double sign = 1.0; 1081 1082 if (*c == '+') { 1083 c++; 1084 } else if (*c == '-') { 1085 sign = -1.0; 1086 c++; 1087 } 1088 1089 // We used strtod() to do the conversion. However, strtod() handles 1090 // infinite values slightly differently than JavaScript in that it 1091 // converts the string "inf" with any capitalization to infinity, 1092 // whereas the ECMA spec requires that it be converted to NaN. 1093 1094 if (strncmp(c, "Infinity", 8) == 0) { 1095 d = sign * Inf; 1096 c += 8; 1097 } else if ((d == Inf || d == -Inf) && *c != 'I' && *c != 'i') { 1098 c = end; 1099 } else { 1100 return NaN; 1101 } 1102 } 1103 } 1104 1105 // allow trailing white space 1106 while (isASCIISpace(*c)) { 1107 c++; 1108 } 1109 // don't allow anything after - unless tolerant=true 1110 if (!tolerateTrailingJunk && *c != '\0') { 1111 d = NaN; 1112 } 1113 1114 return d; 1115 } 1116 1117 #ifdef __FAST_MATH__ 1118 # error "KJS does not work correctly with -ffast-math" 1119 #endif 1120 1121 double UString::toDouble(bool tolerateTrailingJunk) const 1122 { 1123 return toDouble(tolerateTrailingJunk, true); 1124 } 1125 1126 double UString::toDouble() const 1127 { 1128 return toDouble(false, true); 1129 } 1130 1131 uint32_t UString::toStrictUInt32(bool *ok) const 1132 { 1133 if (ok) { 1134 *ok = false; 1135 } 1136 1137 // Empty string is not OK. 1138 int len = m_rep->len; 1139 if (len == 0) { 1140 return 0; 1141 } 1142 const UChar *p = m_rep->data(); 1143 unsigned short c = p->unicode(); 1144 1145 // If the first digit is 0, only 0 itself is OK. 1146 if (c == '0') { 1147 if (len == 1 && ok) { 1148 *ok = true; 1149 } 1150 return 0; 1151 } 1152 1153 // Convert to UInt32, checking for overflow. 1154 uint32_t i = 0; 1155 while (1) { 1156 // Process character, turning it into a digit. 1157 if (c < '0' || c > '9') { 1158 return 0; 1159 } 1160 const unsigned d = c - '0'; 1161 1162 // Multiply by 10, checking for overflow out of 32 bits. 1163 if (i > 0xFFFFFFFFU / 10) { 1164 return 0; 1165 } 1166 i *= 10; 1167 1168 // Add in the digit, checking for overflow out of 32 bits. 1169 const unsigned max = 0xFFFFFFFFU - d; 1170 if (i > max) { 1171 return 0; 1172 } 1173 i += d; 1174 1175 // Handle end of string. 1176 if (--len == 0) { 1177 if (ok) { 1178 *ok = true; 1179 } 1180 return i; 1181 } 1182 1183 // Get next character. 1184 c = (++p)->unicode(); 1185 } 1186 } 1187 1188 int UString::find(const UString &f, int pos) const 1189 { 1190 int sz = size(); 1191 int fsz = f.size(); 1192 if (sz < fsz) { 1193 return -1; 1194 } 1195 if (pos < 0) { 1196 pos = 0; 1197 } 1198 if (fsz == 0) { 1199 return pos; 1200 } 1201 const UChar *data_ = data(); 1202 const UChar *end = data_ + sz - fsz; 1203 int fsizeminusone = (fsz - 1) * sizeof(UChar); 1204 const UChar *fdata = f.data(); 1205 unsigned short fchar = fdata->uc; 1206 ++fdata; 1207 for (const UChar *c = data_ + pos; c <= end; c++) 1208 if (c->uc == fchar && !memcmp(c + 1, fdata, fsizeminusone)) { 1209 return (c - data_); 1210 } 1211 1212 return -1; 1213 } 1214 1215 int UString::find(UChar ch, int pos) const 1216 { 1217 if (pos < 0) { 1218 pos = 0; 1219 } 1220 const UChar *data_ = data(); 1221 const UChar *end = data_ + size(); 1222 for (const UChar *c = data_ + pos; c < end; c++) 1223 if (*c == ch) { 1224 return (c - data_); 1225 } 1226 1227 return -1; 1228 } 1229 1230 int UString::rfind(const UString &f, int pos) const 1231 { 1232 int sz = size(); 1233 int fsz = f.size(); 1234 if (sz < fsz) { 1235 return -1; 1236 } 1237 if (pos < 0) { 1238 pos = 0; 1239 } 1240 if (pos > sz - fsz) { 1241 pos = sz - fsz; 1242 } 1243 if (fsz == 0) { 1244 return pos; 1245 } 1246 int fsizeminusone = (fsz - 1) * sizeof(UChar); 1247 const UChar *fdata = f.data(); 1248 const UChar *data_ = data(); 1249 for (const UChar *c = data_ + pos; c >= data_; c--) { 1250 if (*c == *fdata && !memcmp(c + 1, fdata + 1, fsizeminusone)) { 1251 return (c - data_); 1252 } 1253 } 1254 1255 return -1; 1256 } 1257 1258 int UString::rfind(UChar ch, int pos) const 1259 { 1260 if (isEmpty()) { 1261 return -1; 1262 } 1263 if (pos + 1 >= size()) { 1264 pos = size() - 1; 1265 } 1266 const UChar *data_ = data(); 1267 for (const UChar *c = data_ + pos; c >= data_; c--) { 1268 if (*c == ch) { 1269 return (c - data_); 1270 } 1271 } 1272 1273 return -1; 1274 } 1275 1276 UString UString::substr(int pos, int len) const 1277 { 1278 int s = size(); 1279 1280 if (pos < 0) { 1281 pos = 0; 1282 } else if (pos >= s) { 1283 pos = s; 1284 } 1285 if (len < 0) { 1286 len = s; 1287 } 1288 if (pos + len >= s) { 1289 len = s - pos; 1290 } 1291 1292 if (pos == 0 && len == s) { 1293 return *this; 1294 } 1295 1296 return UString(Rep::create(m_rep, pos, len)); 1297 } 1298 1299 size_t UString::maxUChars() { 1300 return ::KJS::maxUChars(); 1301 } 1302 1303 void UString::copyForWriting() 1304 { 1305 int l = size(); 1306 if (!l) { 1307 return; // Not going to touch anything anyway. 1308 } 1309 if (m_rep->rc > 1 || !m_rep->baseIsSelf()) { 1310 UChar *n = allocChars(l); 1311 memcpy(n, data(), l * sizeof(UChar)); 1312 m_rep = Rep::create(n, l); 1313 } 1314 } 1315 1316 bool operator==(const UString &s1, const UString &s2) 1317 { 1318 #if 0 1319 if (s1.m_rep == s2.m_rep) { 1320 return true; 1321 } 1322 #endif 1323 1324 if (s1.m_rep->len != s2.m_rep->len) { 1325 return false; 1326 } 1327 1328 return (memcmp(s1.m_rep->data(), s2.m_rep->data(), 1329 s1.m_rep->len * sizeof(UChar)) == 0); 1330 } 1331 1332 bool operator==(const UString &s1, const char *s2) 1333 { 1334 if (s2 == nullptr) { 1335 return s1.isEmpty(); 1336 } 1337 1338 const UChar *u = s1.data(); 1339 const UChar *uend = u + s1.size(); 1340 while (u != uend && *s2) { 1341 if (u->uc != (unsigned char)*s2) { 1342 return false; 1343 } 1344 s2++; 1345 u++; 1346 } 1347 1348 return u == uend && *s2 == 0; 1349 } 1350 1351 bool operator<(const UString &s1, const UString &s2) 1352 { 1353 const int l1 = s1.size(); 1354 const int l2 = s2.size(); 1355 const int lmin = l1 < l2 ? l1 : l2; 1356 const UChar *c1 = s1.data(); 1357 const UChar *c2 = s2.data(); 1358 int l = 0; 1359 while (l < lmin && *c1 == *c2) { 1360 c1++; 1361 c2++; 1362 l++; 1363 } 1364 if (l < lmin) { 1365 return (c1->uc < c2->uc); 1366 } 1367 1368 return (l1 < l2); 1369 } 1370 1371 bool UString::equal(const UString::Rep *r, const UString::Rep *b) 1372 { 1373 if (r == b) { 1374 return true; 1375 } 1376 1377 int length = r->len; 1378 if (length != b->len) { 1379 return false; 1380 } 1381 1382 const UChar *d = r->data(); 1383 const UChar *s = b->data(); 1384 for (int i = 0; i != length; ++i) 1385 if (d[i].uc != s[i].uc) { 1386 return false; 1387 } 1388 return true; 1389 } 1390 1391 int compare(const UString &s1, const UString &s2) 1392 { 1393 const int l1 = s1.size(); 1394 const int l2 = s2.size(); 1395 const int lmin = l1 < l2 ? l1 : l2; 1396 const UChar *c1 = s1.data(); 1397 const UChar *c2 = s2.data(); 1398 int l = 0; 1399 while (l < lmin && *c1 == *c2) { 1400 c1++; 1401 c2++; 1402 l++; 1403 } 1404 1405 if (l < lmin) { 1406 return (c1->uc > c2->uc) ? 1 : -1; 1407 } 1408 1409 if (l1 == l2) { 1410 return 0; 1411 } 1412 1413 return (l1 > l2) ? 1 : -1; 1414 } 1415 1416 inline int inlineUTF8SequenceLengthNonASCII(char b0) 1417 { 1418 if ((b0 & 0xC0) != 0xC0) { 1419 return 0; 1420 } 1421 if ((b0 & 0xE0) == 0xC0) { 1422 return 2; 1423 } 1424 if ((b0 & 0xF0) == 0xE0) { 1425 return 3; 1426 } 1427 if ((b0 & 0xF8) == 0xF0) { 1428 return 4; 1429 } 1430 return 0; 1431 } 1432 1433 int UTF8SequenceLengthNonASCII(char b0) 1434 { 1435 return inlineUTF8SequenceLengthNonASCII(b0); 1436 } 1437 1438 inline int inlineUTF8SequenceLength(char b0) 1439 { 1440 return (b0 & 0x80) == 0 ? 1 : UTF8SequenceLengthNonASCII(b0); 1441 } 1442 1443 // Given a first byte, gives the length of the UTF-8 sequence it begins. 1444 // Returns 0 for bytes that are not legal starts of UTF-8 sequences. 1445 // Only allows sequences of up to 4 bytes, since that works for all Unicode characters (U-00000000 to U-0010FFFF). 1446 int UTF8SequenceLength(char b0) 1447 { 1448 return (b0 & 0x80) == 0 ? 1 : inlineUTF8SequenceLengthNonASCII(b0); 1449 } 1450 1451 // Takes a null-terminated C-style string with a UTF-8 sequence in it and converts it to a character. 1452 // Only allows Unicode characters (U-00000000 to U-0010FFFF). 1453 // Returns -1 if the sequence is not valid (including presence of extra bytes). 1454 int decodeUTF8Sequence(const char *sequence) 1455 { 1456 // Handle 0-byte sequences (never valid). 1457 const unsigned char b0 = sequence[0]; 1458 const int length = inlineUTF8SequenceLength(b0); 1459 if (length == 0) { 1460 return -1; 1461 } 1462 1463 // Handle 1-byte sequences (plain ASCII). 1464 const unsigned char b1 = sequence[1]; 1465 if (length == 1) { 1466 if (b1) { 1467 return -1; 1468 } 1469 return b0; 1470 } 1471 1472 // Handle 2-byte sequences. 1473 if ((b1 & 0xC0) != 0x80) { 1474 return -1; 1475 } 1476 const unsigned char b2 = sequence[2]; 1477 if (length == 2) { 1478 if (b2) { 1479 return -1; 1480 } 1481 const int c = ((b0 & 0x1F) << 6) | (b1 & 0x3F); 1482 if (c < 0x80) { 1483 return -1; 1484 } 1485 return c; 1486 } 1487 1488 // Handle 3-byte sequences. 1489 if ((b2 & 0xC0) != 0x80) { 1490 return -1; 1491 } 1492 const unsigned char b3 = sequence[3]; 1493 if (length == 3) { 1494 if (b3) { 1495 return -1; 1496 } 1497 const int c = ((b0 & 0xF) << 12) | ((b1 & 0x3F) << 6) | (b2 & 0x3F); 1498 if (c < 0x800) { 1499 return -1; 1500 } 1501 // UTF-16 surrogates should never appear in UTF-8 data. 1502 if (c >= 0xD800 && c <= 0xDFFF) { 1503 return -1; 1504 } 1505 // Backwards BOM and U+FFFF should never appear in UTF-8 data. 1506 if (c == 0xFFFE || c == 0xFFFF) { 1507 return -1; 1508 } 1509 return c; 1510 } 1511 1512 // Handle 4-byte sequences. 1513 if ((b3 & 0xC0) != 0x80) { 1514 return -1; 1515 } 1516 const unsigned char b4 = sequence[4]; 1517 if (length == 4) { 1518 if (b4) { 1519 return -1; 1520 } 1521 const int c = ((b0 & 0x7) << 18) | ((b1 & 0x3F) << 12) | ((b2 & 0x3F) << 6) | (b3 & 0x3F); 1522 if (c < 0x10000 || c > 0x10FFFF) { 1523 return -1; 1524 } 1525 return c; 1526 } 1527 1528 return -1; 1529 } 1530 1531 CString UString::UTF8String() const 1532 { 1533 // Allocate a buffer big enough to hold all the characters. 1534 const int length = size(); 1535 Vector<char, 1024> buffer(length * 3); 1536 1537 // Convert to runs of 8-bit characters. 1538 char *p = buffer.begin(); 1539 const unsigned short *d = &data()->uc; 1540 for (int i = 0; i != length; ++i) { 1541 unsigned int c = d[i], sc; 1542 if (c < 0x80) { 1543 *p++ = (char)c; 1544 } else if (c < 0x800) { 1545 *p++ = (char)((c >> 6) | 0xC0); // C0 is the 2-byte flag for UTF-8 1546 *p++ = (char)((c | 0x80) & 0xBF); // next 6 bits, with high bit set 1547 } else if (c >= 0xD800 && c <= 0xDBFF && (i + 1) < length && 1548 (sc = d[i + 1]) >= 0xDC00 && sc <= 0xDFFF) { 1549 sc = 0x10000 + (((c & 0x3FF) << 10) | (sc & 0x3FF)); 1550 *p++ = (char)((sc >> 18) | 0xF0); // F0 is the 4-byte flag for UTF-8 1551 *p++ = (char)(((sc >> 12) | 0x80) & 0xBF); // next 6 bits, with high bit set 1552 *p++ = (char)(((sc >> 6) | 0x80) & 0xBF); // next 6 bits, with high bit set 1553 *p++ = (char)((sc | 0x80) & 0xBF); // next 6 bits, with high bit set 1554 ++i; 1555 } else { 1556 *p++ = (char)((c >> 12) | 0xE0); // E0 is the 3-byte flag for UTF-8 1557 *p++ = (char)(((c >> 6) | 0x80) & 0xBF); // next 6 bits, with high bit set 1558 *p++ = (char)((c | 0x80) & 0xBF); // next 6 bits, with high bit set 1559 } 1560 } 1561 1562 // Return the result as a C string. 1563 CString result(buffer.data(), p - buffer.data()); 1564 1565 return result; 1566 } 1567 1568 } // namespace KJS