src/kjs/ustring.cpp

0001 /*
0002  *  This file is part of the KDE libraries
0003  *  Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
0004  *  Copyright (C) 2004, 2005, 2006, 2007 Apple Inc. All rights reserved.
0005  *  Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca)
0006  *
0007  *  This library is free software; you can redistribute it and/or
0008  *  modify it under the terms of the GNU Library General Public
0009  *  License as published by the Free Software Foundation; either
0010  *  version 2 of the License, or (at your option) any later version.
0011  *
0012  *  This library is distributed in the hope that it will be useful,
0013  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
0014  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
0015  *  Library General Public License for more details.
0016  *
0017  *  You should have received a copy of the GNU Library General Public License
0018  *  along with this library; see the file COPYING.LIB.  If not, write to
0019  *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
0020  *  Boston, MA 02110-1301, USA.
0021  *
0022  */
0023
0024 #include "ustring.h"
0025
0026 #include <stdlib.h>
0027 #include <stdio.h>
0028 #include "wtf/DisallowCType.h"
0029 #include "wtf/ASCIICType.h"
0030 #if HAVE_STRINGS_H
0031 #include <strings.h>
0032 #endif
0033 #include <limits.h>
0034
0035 #include "operations.h"
0036 #include "function.h"
0037 #include "identifier.h"
0038 #include <math.h>
0039 #include "dtoa.h"
0040 #include "commonunicode.h"
0041
0042
0043 using std::max;
0044
0045 // GCC cstring uses these automatically, but not all implementations do.
0046 using std::strlen;
0047 using std::strcpy;
0048 using std::strncpy;
0049 using std::memset;
0050 using std::memcpy;
0051
0052 using namespace WTF;
0053
0054 namespace KJS
0055 {
0056
0057 extern const double NaN;
0058 extern const double Inf;
0059
0060 static inline size_t overflowIndicator()
0061 {
0062     return std::numeric_limits<size_t>::max();
0063 }
0064 static inline size_t maxUChars()
0065 {
0066     // We don't want strings to get too crazy, since OOM hurts... and since we use 32-bit lengths
0067     // on 64-bit, too, keeping this small prevents overflows.
0068     return 0xFFFFFFF;
0069 }
0070
0071 static inline UChar *allocChars(size_t length)
0072 {
0073     assert(length);
0074     if (length > maxUChars()) {
0075         return nullptr;
0076     }
0077     return static_cast<UChar *>(fastMalloc(sizeof(UChar) * length));
0078 }
0079
0080 static inline UChar *reallocChars(UChar *buffer, size_t length)
0081 {
0082     ASSERT(length);
0083     if (length > maxUChars()) {
0084         return nullptr;
0085     }
0086     return static_cast<UChar *>(fastRealloc(buffer, sizeof(UChar) * length));
0087 }
0088
0089 CString::CString(const char *c)
0090 {
0091     length = strlen(c);
0092     data = new char[length + 1];
0093     memcpy(data, c, length + 1);
0094 }
0095
0096 CString::CString(const char *c, size_t len)
0097 {
0098     length = len;
0099     data = new char[len + 1];
0100     memcpy(data, c, len);
0101     data[len] = 0;
0102 }
0103
0104 CString::CString(const CString &b)
0105 {
0106     length = b.length;
0107     if (length > 0 && b.data) {
0108         data = new char[length + 1];
0109         memcpy(data, b.data, length + 1);
0110     } else {
0111         data = nullptr;
0112     }
0113 }
0114
0115 CString::~CString()
0116 {
0117     delete [] data;
0118 }
0119
0120 CString &CString::operator=(const char *c)
0121 {
0122     if (data) {
0123         delete [] data;
0124     }
0125     length = strlen(c);
0126     data = new char[length + 1];
0127     memcpy(data, c, length + 1);
0128
0129     return *this;
0130 }
0131
0132 CString &CString::operator=(const CString &str)
0133 {
0134     if (this == &str) {
0135         return *this;
0136     }
0137
0138     if (data) {
0139         delete [] data;
0140     }
0141     length = str.length;
0142     if (str.data) {
0143         data = new char[length + 1];
0144         memcpy(data, str.data, length + 1);
0145     } else {
0146         data = nullptr;
0147     }
0148
0149     return *this;
0150 }
0151
0152 bool operator==(const CString &c1, const CString &c2)
0153 {
0154     size_t len = c1.size();
0155     return len == c2.size() && (len == 0 || memcmp(c1.c_str(), c2.c_str(), len) == 0);
0156 }
0157
0158 // Hack here to avoid a global with a constructor; point to an unsigned short instead of a UChar.
0159 static unsigned short almostUChar;
0160 UString::Rep UString::Rep::null = { 0, 0, 1, 0, 0, &UString::Rep::null, 0, nullptr, 0, 0, 0, 0 };
0161 UString::Rep UString::Rep::empty = { 0, 0, 1, 0, 0, &UString::Rep::empty, 0, reinterpret_cast<UChar *>(&almostUChar), 0, 0, 0, 0 };
0162 const int normalStatBufferSize = 4096;
0163 static char *statBuffer = nullptr; // FIXME: This buffer is never deallocated.
0164 static int statBufferSize = 0;
0165
0166 PassRefPtr<UString::Rep> UString::Rep::createCopying(const UChar *d, int length)
0167 {
0168     UChar *copyD = allocChars(length);
0169     memcpy(copyD, d, length * sizeof(UChar));
0170
0171     return create(copyD, length);
0172 }
0173
0174 PassRefPtr<UString::Rep> UString::Rep::create(UChar *d, int l)
0175 {
0176     Rep *r = new Rep;
0177     r->offset = 0;
0178     r->len = l;
0179     r->rc = 1;
0180     r->_hash = 0;
0181     r->isIdentifier = 0;
0182     r->baseString = r;
0183     r->reportedCost = 0;
0184     r->buf = d;
0185     r->usedCapacity = l;
0186     r->capacity = l;
0187     r->usedPreCapacity = 0;
0188     r->preCapacity = 0;
0189
0190     // steal the single reference this Rep was created with
0191     return adoptRef(r);
0192 }
0193
0194 PassRefPtr<UString::Rep> UString::Rep::create(PassRefPtr<Rep> base, int offset, int length)
0195 {
0196     assert(base);
0197
0198     int baseOffset = base->offset;
0199
0200     base = base->baseString;
0201
0202     assert(-(offset + baseOffset) <= base->usedPreCapacity);
0203     assert(offset + baseOffset + length <= base->usedCapacity);
0204
0205     Rep *r = new Rep;
0206     r->offset = baseOffset + offset;
0207     r->len = length;
0208     r->rc = 1;
0209     r->_hash = 0;
0210     r->isIdentifier = 0;
0211     r->baseString = base.releaseRef();
0212     r->reportedCost = 0;
0213     r->buf = nullptr;
0214     r->usedCapacity = 0;
0215     r->capacity = 0;
0216     r->usedPreCapacity = 0;
0217     r->preCapacity = 0;
0218
0219     // steal the single reference this Rep was created with
0220     return adoptRef(r);
0221 }
0222
0223 void UString::Rep::destroy()
0224 {
0225     if (isIdentifier) {
0226         Identifier::remove(this);
0227     }
0228     if (baseString != this) {
0229         baseString->deref();
0230     } else {
0231         fastFree(buf);
0232     }
0233     delete this;
0234 }
0235
0236 // Golden ratio - arbitrary start value to avoid mapping all 0's to all 0's
0237 // or anything like that.
0238 const unsigned PHI = 0x9e3779b9U;
0239
0240 // Paul Hsieh's SuperFastHash
0241 // http://www.azillionmonkeys.com/qed/hash.html
0242 unsigned UString::Rep::computeHash(const UChar *s, int len)
0243 {
0244     unsigned l = len;
0245     uint32_t hash = PHI;
0246     uint32_t tmp;
0247
0248     int rem = l & 1;
0249     l >>= 1;
0250
0251     // Main loop
0252     for (; l > 0; l--) {
0253         hash += s[0].uc;
0254         tmp = (s[1].uc << 11) ^ hash;
0255         hash = (hash << 16) ^ tmp;
0256         s += 2;
0257         hash += hash >> 11;
0258     }
0259
0260     // Handle end case
0261     if (rem) {
0262         hash += s[0].uc;
0263         hash ^= hash << 11;
0264         hash += hash >> 17;
0265     }
0266
0267     // Force "avalanching" of final 127 bits
0268     hash ^= hash << 3;
0269     hash += hash >> 5;
0270     hash ^= hash << 2;
0271     hash += hash >> 15;
0272     hash ^= hash << 10;
0273
0274     // this avoids ever returning a hash code of 0, since that is used to
0275     // signal "hash not computed yet", using a value that is likely to be
0276     // effectively the same as 0 when the low bits are masked
0277     if (hash == 0) {
0278         hash = 0x80000000;
0279     }
0280
0281     return hash;
0282 }
0283
0284 // Paul Hsieh's SuperFastHash
0285 // http://www.azillionmonkeys.com/qed/hash.html
0286 unsigned UString::Rep::computeHash(const char *s, int len)
0287 {
0288     // This hash is designed to work on 16-bit chunks at a time. But since the normal case
0289     // (above) is to hash UTF-16 characters, we just treat the 8-bit chars as if they
0290     // were 16-bit chunks, which should give matching results
0291
0292     uint32_t hash = PHI;
0293     uint32_t tmp;
0294     unsigned l = len;
0295
0296     int rem = l & 1;
0297     l >>= 1;
0298
0299     // Main loop
0300     for (; l > 0; l--) {
0301         hash += (unsigned char)s[0];
0302         tmp = ((unsigned char)s[1] << 11) ^ hash;
0303         hash = (hash << 16) ^ tmp;
0304         s += 2;
0305         hash += hash >> 11;
0306     }
0307
0308     // Handle end case
0309     if (rem) {
0310         hash += (unsigned char)s[0];
0311         hash ^= hash << 11;
0312         hash += hash >> 17;
0313     }
0314
0315     // Force "avalanching" of final 127 bits
0316     hash ^= hash << 3;
0317     hash += hash >> 5;
0318     hash ^= hash << 2;
0319     hash += hash >> 15;
0320     hash ^= hash << 10;
0321
0322     // this avoids ever returning a hash code of 0, since that is used to
0323     // signal "hash not computed yet", using a value that is likely to be
0324     // effectively the same as 0 when the low bits are masked
0325     if (hash == 0) {
0326         hash = 0x80000000;
0327     }
0328
0329     return hash;
0330 }
0331
0332 unsigned UString::Rep::computeHash(const char *s)
0333 {
0334     return computeHash(s, strlen(s));
0335 }
0336
0337 // put these early so they can be inlined
0338 inline size_t UString::expandedSize(size_t size, size_t otherSize) const
0339 {
0340     // Do the size calculation in two parts, returning overflowIndicator if
0341     // we overflow the maximum value that we can handle.
0342
0343     if (size > maxUChars()) {
0344         return overflowIndicator();
0345     }
0346
0347     size_t expandedSize = ((size + 10) / 10 * 11) + 1;
0348     if (maxUChars() - expandedSize < otherSize) {
0349         return overflowIndicator();
0350     }
0351
0352     return expandedSize + otherSize;
0353 }
0354
0355 inline int UString::usedCapacity() const
0356 {
0357     return m_rep->baseString->usedCapacity;
0358 }
0359
0360 inline int UString::usedPreCapacity() const
0361 {
0362     return m_rep->baseString->usedPreCapacity;
0363 }
0364
0365 void UString::expandCapacity(int requiredLength)
0366 {
0367     Rep *r = m_rep->baseString;
0368
0369     if (requiredLength > r->capacity) {
0370         size_t newCapacity = expandedSize(requiredLength, r->preCapacity);
0371         UChar *oldBuf = r->buf;
0372         r->buf = reallocChars(r->buf, newCapacity);
0373         if (!r->buf) {
0374             r->buf = oldBuf;
0375             m_rep = &Rep::null;
0376             return;
0377         }
0378         r->capacity = newCapacity - r->preCapacity;
0379     }
0380     if (requiredLength > r->usedCapacity) {
0381         r->usedCapacity = requiredLength;
0382     }
0383 }
0384
0385 void UString::expandPreCapacity(int requiredPreCap)
0386 {
0387     Rep *r = m_rep->baseString;
0388
0389     if (requiredPreCap > r->preCapacity) {
0390         size_t newCapacity = expandedSize(requiredPreCap, r->capacity);
0391         int delta = newCapacity - r->capacity - r->preCapacity;
0392
0393         UChar *newBuf = allocChars(newCapacity);
0394         if (!newBuf) {
0395             m_rep = &Rep::null;
0396             return;
0397         }
0398         memcpy(newBuf + delta, r->buf, (r->capacity + r->preCapacity) * sizeof(UChar));
0399         fastFree(r->buf);
0400         r->buf = newBuf;
0401
0402         r->preCapacity = newCapacity - r->capacity;
0403     }
0404     if (requiredPreCap > r->usedPreCapacity) {
0405         r->usedPreCapacity = requiredPreCap;
0406     }
0407 }
0408
0409 UString::UString(Empty)
0410     : m_rep(&Rep::empty)
0411 {
0412 }
0413
0414 UString::UString(char c)
0415     : m_rep(Rep::create(allocChars(1), 1))
0416 {
0417     m_rep->buf[0] = static_cast<unsigned char>(c);
0418 }
0419
0420 UString::UString(const char *c)
0421 {
0422     if (!c) {
0423         m_rep = &Rep::null;
0424         return;
0425     }
0426
0427     if (!c[0]) {
0428         m_rep = &Rep::empty;
0429         return;
0430     }
0431
0432     size_t length = strlen(c);
0433     UChar *d = allocChars(length);
0434     if (!d) {
0435         m_rep = &Rep::null;
0436     } else {
0437         for (size_t i = 0; i < length; i++) {
0438             d[i].uc = c[i];
0439         }
0440         m_rep = Rep::create(d, static_cast<int>(length));
0441     }
0442 }
0443
0444 UString::UString(const char *c, size_t length)
0445 {
0446     if (!c) {
0447         m_rep = &Rep::null;
0448         return;
0449     }
0450
0451     if (length == 0) {
0452         m_rep = &Rep::empty;
0453         return;
0454     }
0455
0456     UChar *d = allocChars(length);
0457     if (!d) {
0458         m_rep = &Rep::null;
0459     } else {
0460         for (size_t i = 0; i < length; i++) {
0461             d[i].uc = c[i];
0462         }
0463         m_rep = Rep::create(d, static_cast<int>(length));
0464     }
0465 }
0466
0467 UString::UString(const UChar *c, int length)
0468 {
0469     if (length == 0) {
0470         m_rep = &Rep::empty;
0471     } else {
0472         m_rep = Rep::createCopying(c, length);
0473     }
0474 }
0475
0476 UString::UString(UChar *c, int length, bool copy)
0477 {
0478     if (length == 0) {
0479         m_rep = &Rep::empty;
0480     } else if (copy) {
0481         m_rep = Rep::createCopying(c, length);
0482     } else {
0483         m_rep = Rep::create(c, length);
0484     }
0485 }
0486
0487 UString::UString(const Vector<UChar> &buffer)
0488 {
0489     if (!buffer.size()) {
0490         m_rep = &Rep::empty;
0491     } else {
0492         m_rep = Rep::createCopying(buffer.data(), buffer.size());
0493     }
0494 }
0495
0496 UString::UString(const UString &a, const UString &b)
0497 {
0498     int aSize = a.size();
0499     int aOffset = a.m_rep->offset;
0500     int bSize = b.size();
0501     int bOffset = b.m_rep->offset;
0502     int length = aSize + bSize;
0503
0504     // possible cases:
0505
0506     if (aSize == 0) {
0507         // a is empty
0508         m_rep = b.m_rep;
0509     } else if (bSize == 0) {
0510         // b is empty
0511         m_rep = a.m_rep;
0512     } else if (aOffset + aSize == a.usedCapacity() && aSize >= minShareSize && 4 * aSize >= bSize &&
0513                (-bOffset != b.usedPreCapacity() || aSize >= bSize)) {
0514         // - a reaches the end of its buffer so it qualifies for shared append
0515         // - also, it's at least a quarter the length of b - appending to a much shorter
0516         //   string does more harm than good
0517         // - however, if b qualifies for prepend and is longer than a, we'd rather prepend
0518         UString x(a);
0519         x.expandCapacity(aOffset + length);
0520         if (a.data() && x.data()) {
0521             memcpy(const_cast<UChar *>(a.data() + aSize), b.data(), bSize * sizeof(UChar));
0522             m_rep = Rep::create(a.m_rep, 0, length);
0523         } else {
0524             m_rep = &Rep::null;
0525         }
0526     } else if (-bOffset == b.usedPreCapacity() && bSize >= minShareSize && 4 * bSize >= aSize) {
0527         // - b reaches the beginning of its buffer so it qualifies for shared prepend
0528         // - also, it's at least a quarter the length of a - prepending to a much shorter
0529         //   string does more harm than good
0530         UString y(b);
0531         y.expandPreCapacity(-bOffset + aSize);
0532         if (b.data() && y.data()) {
0533             memcpy(const_cast<UChar *>(b.data() - aSize), a.data(), aSize * sizeof(UChar));
0534             m_rep = Rep::create(b.m_rep, -aSize, length);
0535         } else {
0536             m_rep = &Rep::null;
0537         }
0538     } else {
0539         // a does not qualify for append, and b does not qualify for prepend, gotta make a whole new string
0540         size_t newCapacity = expandedSize(length, 0);
0541         UChar *d = allocChars(newCapacity);
0542         if (!d) {
0543             m_rep = &Rep::null;
0544         } else {
0545             memcpy(d, a.data(), aSize * sizeof(UChar));
0546             memcpy(d + aSize, b.data(), bSize * sizeof(UChar));
0547             m_rep = Rep::create(d, length);
0548             m_rep->capacity = newCapacity;
0549         }
0550     }
0551 }
0552
0553 const UString &UString::null()
0554 {
0555     static UString *n = new UString;
0556     return *n;
0557 }
0558
0559 UString UString::from(int i)
0560 {
0561     UChar buf[1 + sizeof(i) * 3];
0562     UChar *end = buf + sizeof(buf) / sizeof(UChar);
0563     UChar *p = end;
0564
0565     if (i == 0) {
0566         *--p = '0';
0567     } else if (i == INT_MIN) {
0568         char minBuf[1 + sizeof(i) * 3];
0569         sprintf(minBuf, "%d", INT_MIN);
0570         return UString(minBuf);
0571     } else {
0572         bool negative = false;
0573         if (i < 0) {
0574             negative = true;
0575             i = -i;
0576         }
0577         while (i) {
0578             *--p = (unsigned short)((i % 10) + '0');
0579             i /= 10;
0580         }
0581         if (negative) {
0582             *--p = '-';
0583         }
0584     }
0585
0586     return UString(p, static_cast<int>(end - p));
0587 }
0588
0589 UString UString::from(unsigned int u)
0590 {
0591     UChar buf[sizeof(u) * 3];
0592     UChar *end = buf + sizeof(buf) / sizeof(UChar);
0593     UChar *p = end;
0594
0595     if (u == 0) {
0596         *--p = '0';
0597     } else {
0598         while (u) {
0599             *--p = (unsigned short)((u % 10) + '0');
0600             u /= 10;
0601         }
0602     }
0603
0604     return UString(p, static_cast<int>(end - p));
0605 }
0606
0607 UString UString::from(long l)
0608 {
0609     UChar buf[1 + sizeof(l) * 3];
0610     UChar *end = buf + sizeof(buf) / sizeof(UChar);
0611     UChar *p = end;
0612
0613     if (l == 0) {
0614         *--p = '0';
0615     } else if (l == LONG_MIN) {
0616         char minBuf[1 + sizeof(l) * 3];
0617         sprintf(minBuf, "%ld", LONG_MIN);
0618         return UString(minBuf);
0619     } else {
0620         bool negative = false;
0621         if (l < 0) {
0622             negative = true;
0623             l = -l;
0624         }
0625         while (l) {
0626             *--p = (unsigned short)((l % 10) + '0');
0627             l /= 10;
0628         }
0629         if (negative) {
0630             *--p = '-';
0631         }
0632     }
0633
0634     return UString(p, static_cast<int>(end - p));
0635 }
0636
0637 UString UString::from(double d)
0638 {
0639     // avoid ever printing -NaN, in JS conceptually there is only one NaN value
0640     if (isNaN(d)) {
0641         return UString("NaN", 3);
0642     }
0643
0644     char buf[80];
0645     int decimalPoint;
0646     int sign;
0647
0648     char *result = kjs_dtoa(d, 0, 0, &decimalPoint, &sign, nullptr);
0649     int length = static_cast<int>(strlen(result));
0650
0651     int i = 0;
0652     if (sign) {
0653         buf[i++] = '-';
0654     }
0655
0656     if (decimalPoint <= 0 && decimalPoint > -6) {
0657         buf[i++] = '0';
0658         buf[i++] = '.';
0659         for (int j = decimalPoint; j < 0; j++) {
0660             buf[i++] = '0';
0661         }
0662         strcpy(buf + i, result);
0663         i += length;
0664     } else if (decimalPoint <= 21 && decimalPoint > 0) {
0665         if (length <= decimalPoint) {
0666             strcpy(buf + i, result);
0667             i += length;
0668             for (int j = 0; j < decimalPoint - length; j++) {
0669                 buf[i++] = '0';
0670             }
0671 //      buf[i] = '\0';
0672         } else {
0673             strncpy(buf + i, result, decimalPoint);
0674             i += decimalPoint;
0675             buf[i++] = '.';
0676             strcpy(buf + i, result + decimalPoint);
0677             i += length - decimalPoint;
0678         }
0679     } else if (result[0] < '0' || result[0] > '9') {
0680         strcpy(buf + i, result);
0681         i += length;
0682     } else {
0683         buf[i++] = result[0];
0684         if (length > 1) {
0685             buf[i++] = '.';
0686             strcpy(buf + i, result + 1);
0687             i += length - 1;
0688         }
0689
0690         buf[i++] = 'e';
0691         buf[i++] = (decimalPoint >= 0) ? '+' : '-';
0692         // decimalPoint can't be more than 3 digits decimal given the
0693         // nature of float representation
0694         int exponential = decimalPoint - 1;
0695         if (exponential < 0) {
0696             exponential = exponential * -1;
0697         }
0698         if (exponential >= 100) {
0699             buf[i++] = '0' + exponential / 100;
0700         }
0701         if (exponential >= 10) {
0702             buf[i++] = '0' + (exponential % 100) / 10;
0703         }
0704         buf[i++] = '0' + exponential % 10;
0705 //    buf[i++] = '\0';
0706     }
0707
0708     kjs_freedtoa(result);
0709
0710     return UString(buf, i);
0711 }
0712
0713 UString UString::spliceSubstringsWithSeparators(const Range *substringRanges, int rangeCount, const UString *separators, int separatorCount) const
0714 {
0715     if (rangeCount == 1 && separatorCount == 0) {
0716         int thisSize = size();
0717         int position = substringRanges[0].position;
0718         int length = substringRanges[0].length;
0719         if (position <= 0 && length >= thisSize) {
0720             return *this;
0721         }
0722         return UString::Rep::create(m_rep, maxInt(0, position), minInt(thisSize, length));
0723     }
0724
0725     int totalLength = 0;
0726     for (int i = 0; i < rangeCount; i++) {
0727         totalLength += substringRanges[i].length;
0728     }
0729     for (int i = 0; i < separatorCount; i++) {
0730         totalLength += separators[i].size();
0731     }
0732
0733     if (totalLength == 0) {
0734         return "";
0735     }
0736
0737     UChar *buffer = allocChars(totalLength);
0738     if (!buffer) {
0739         return null();
0740     }
0741
0742     int maxCount = max(rangeCount, separatorCount);
0743     int bufferPos = 0;
0744     for (int i = 0; i < maxCount; i++) {
0745         if (i < rangeCount) {
0746             memcpy(buffer + bufferPos, data() + substringRanges[i].position, substringRanges[i].length * sizeof(UChar));
0747             bufferPos += substringRanges[i].length;
0748         }
0749         if (i < separatorCount) {
0750             memcpy(buffer + bufferPos, separators[i].data(), separators[i].size() * sizeof(UChar));
0751             bufferPos += separators[i].size();
0752         }
0753     }
0754
0755     return UString::Rep::create(buffer, totalLength);
0756 }
0757
0758 // Append a sub-string of <subStr> to this string.
0759 // Equivalent to append(subStr.substr(subPos, subLength))
0760
0761 UString &UString::append(const UString &subStr, int subPos, int subLength)
0762 {
0763     int subSize = subStr.size();
0764
0765     if (subPos < 0) {
0766         subPos = 0;
0767     } else if (subPos >= subSize) {
0768         subPos = subSize;
0769     }
0770     if (subLength < 0) {
0771         subLength = subSize;
0772     }
0773     if (subPos + subLength >= subSize) {
0774         subLength = subSize - subPos;
0775     }
0776
0777     return append(UString(subStr.data() + subPos, subLength));
0778 }
0779
0780 UString &UString::append(const UString &t)
0781 {
0782     int thisSize = size();
0783     int thisOffset = m_rep->offset;
0784     int tSize = t.size();
0785     int length = thisSize + tSize;
0786
0787     // possible cases:
0788     if (thisSize == 0) {
0789         // this is empty
0790         *this = t;
0791     } else if (tSize == 0) {
0792         // t is empty
0793     } else if (m_rep->baseIsSelf() && m_rep->rc == 1) {
0794         // this is direct and has refcount of 1 (so we can just alter it directly)
0795         expandCapacity(thisOffset + length);
0796         if (data()) {
0797             memcpy(const_cast<UChar *>(data() + thisSize), t.data(), tSize * sizeof(UChar));
0798             m_rep->len = length;
0799             m_rep->_hash = 0;
0800         }
0801     } else if (thisOffset + thisSize == usedCapacity() && thisSize >= minShareSize) {
0802         // this reaches the end of the buffer - extend it if it's long enough to append to
0803         expandCapacity(thisOffset + length);
0804         if (data()) {
0805             memcpy(const_cast<UChar *>(data() + thisSize), t.data(), tSize * sizeof(UChar));
0806             m_rep = Rep::create(m_rep, 0, length);
0807         }
0808     } else {
0809         // this is shared with someone using more capacity, gotta make a whole new string
0810         size_t newCapacity = expandedSize(length, 0);
0811         UChar *d = allocChars(newCapacity);
0812         if (!d) {
0813             m_rep = &Rep::null;
0814         } else {
0815             memcpy(d, data(), thisSize * sizeof(UChar));
0816             memcpy(const_cast<UChar *>(d + thisSize), t.data(), tSize * sizeof(UChar));
0817             m_rep = Rep::create(d, length);
0818             m_rep->capacity = newCapacity;
0819         }
0820     }
0821
0822     return *this;
0823 }
0824
0825 UString &UString::append(const char *t)
0826 {
0827     int thisSize = size();
0828     int thisOffset = m_rep->offset;
0829     int tSize = static_cast<int>(strlen(t));
0830     int length = thisSize + tSize;
0831
0832     // possible cases:
0833     if (thisSize == 0) {
0834         // this is empty
0835         *this = t;
0836     } else if (tSize == 0) {
0837         // t is empty, we'll just return *this below.
0838     } else if (m_rep->baseIsSelf() && m_rep->rc == 1) {
0839         // this is direct and has refcount of 1 (so we can just alter it directly)
0840         expandCapacity(thisOffset + length);
0841         UChar *d = const_cast<UChar *>(data());
0842         if (d) {
0843             for (int i = 0; i < tSize; ++i) {
0844                 d[thisSize + i] = t[i];
0845             }
0846             m_rep->len = length;
0847             m_rep->_hash = 0;
0848         }
0849     } else if (thisOffset + thisSize == usedCapacity() && thisSize >= minShareSize) {
0850         // this string reaches the end of the buffer - extend it
0851         expandCapacity(thisOffset + length);
0852         UChar *d = const_cast<UChar *>(data());
0853         if (d) {
0854             for (int i = 0; i < tSize; ++i) {
0855                 d[thisSize + i] = t[i];
0856             }
0857             m_rep = Rep::create(m_rep, 0, length);
0858         }
0859     } else {
0860         // this is shared with someone using more capacity, gotta make a whole new string
0861         size_t newCapacity = expandedSize(length, 0);
0862         UChar *d = allocChars(newCapacity);
0863         if (!d) {
0864             m_rep = &Rep::null;
0865         } else {
0866             memcpy(d, data(), thisSize * sizeof(UChar));
0867             for (int i = 0; i < tSize; ++i) {
0868                 d[thisSize + i] = t[i];
0869             }
0870             m_rep = Rep::create(d, length);
0871             m_rep->capacity = newCapacity;
0872         }
0873     }
0874
0875     return *this;
0876 }
0877
0878 UString &UString::append(unsigned short c)
0879 {
0880     int thisOffset = m_rep->offset;
0881     int length = size();
0882
0883     // possible cases:
0884     if (length == 0) {
0885         // this is empty - must make a new m_rep because we don't want to pollute the shared empty one
0886         size_t newCapacity = expandedSize(1, 0);
0887         UChar *d = allocChars(newCapacity);
0888         if (!d) {
0889             m_rep = &Rep::null;
0890         } else {
0891             d[0] = c;
0892             m_rep = Rep::create(d, 1);
0893             m_rep->capacity = newCapacity;
0894         }
0895     } else if (m_rep->baseIsSelf() && m_rep->rc == 1) {
0896         // this is direct and has refcount of 1 (so we can just alter it directly)
0897         expandCapacity(thisOffset + length + 1);
0898         UChar *d = const_cast<UChar *>(data());
0899         if (d) {
0900             d[length] = c;
0901             m_rep->len = length + 1;
0902             m_rep->_hash = 0;
0903         }
0904     } else if (thisOffset + length == usedCapacity() && length >= minShareSize) {
0905         // this reaches the end of the string - extend it and share
0906         expandCapacity(thisOffset + length + 1);
0907         UChar *d = const_cast<UChar *>(data());
0908         if (d) {
0909             d[length] = c;
0910             m_rep = Rep::create(m_rep, 0, length + 1);
0911         }
0912     } else {
0913         // this is shared with someone using more capacity, gotta make a whole new string
0914         size_t newCapacity = expandedSize(length + 1, 0);
0915         UChar *d = allocChars(newCapacity);
0916         if (!d) {
0917             m_rep = &Rep::null;
0918         } else {
0919             memcpy(d, data(), length * sizeof(UChar));
0920             d[length] = c;
0921             m_rep = Rep::create(d, length + 1);
0922             m_rep->capacity = newCapacity;
0923         }
0924     }
0925
0926     return *this;
0927 }
0928
0929 CString UString::cstring() const
0930 {
0931     return ascii();
0932 }
0933
0934 char *UString::ascii() const
0935 {
0936     // Never make the buffer smaller than normalStatBufferSize.
0937     // Thus we almost never need to reallocate.
0938     int length = size();
0939     int neededSize = length + 1;
0940     if (neededSize < normalStatBufferSize) {
0941         neededSize = normalStatBufferSize;
0942     }
0943     if (neededSize != statBufferSize) {
0944         delete [] statBuffer;
0945         statBuffer = new char [neededSize];
0946         statBufferSize = neededSize;
0947     }
0948
0949     const UChar *p = data();
0950     char *q = statBuffer;
0951     const UChar *limit = p + length;
0952     while (p != limit) {
0953         *q = static_cast<char>(p->uc);
0954         ++p;
0955         ++q;
0956     }
0957     *q = '\0';
0958
0959     return statBuffer;
0960 }
0961
0962 UString &UString::operator=(Empty)
0963 {
0964     m_rep = &Rep::empty;
0965
0966     return *this;
0967 }
0968
0969 UString &UString::operator=(const char *c)
0970 {
0971     set(c, c ? strlen(c) : 0);
0972
0973     return *this;
0974 }
0975
0976 void UString::set(const char *c, int l)
0977 {
0978     if (!c) {
0979         m_rep = &Rep::null;
0980         return;
0981     }
0982
0983     if (l == 0) {
0984         m_rep = &Rep::empty;
0985         return;
0986     }
0987
0988     UChar *d;
0989     if (m_rep->rc == 1 && l <= m_rep->capacity && m_rep->baseIsSelf() && m_rep->offset == 0 && m_rep->preCapacity == 0) {
0990         d = m_rep->buf;
0991         m_rep->_hash = 0;
0992         m_rep->len = l;
0993     } else {
0994         d = allocChars(l);
0995         if (!d) {
0996             m_rep = &Rep::null;
0997             return;
0998         }
0999         m_rep = Rep::create(d, l);
1000     }
1001     for (int i = 0; i < l; i++) {
1002         d[i].uc = static_cast<unsigned char>(c[i]);
1003     }
1004 }
1005
1006 bool UString::is8Bit() const
1007 {
1008     const UChar *u = data();
1009     const UChar *limit = u + size();
1010     while (u < limit) {
1011         if (u->uc > 0xFF) {
1012             return false;
1013         }
1014         ++u;
1015     }
1016
1017     return true;
1018 }
1019
1020 const UChar UString::operator[](int pos) const
1021 {
1022     if (pos >= size()) {
1023         return '\0';
1024     }
1025     return data()[pos];
1026 }
1027
1028 double UString::toDouble(bool tolerateTrailingJunk, bool tolerateEmptyString) const
1029 {
1030     double d;
1031
1032     const int length = size();
1033     int leadingSpaces = 0;
1034
1035     // skip leading white space
1036     while (leadingSpaces < length && CommonUnicode::isStrWhiteSpace(data()[leadingSpaces].uc)) {
1037         ++leadingSpaces;
1038     }
1039
1040     UString whitespaceSkipped = substr(leadingSpaces, length - leadingSpaces);
1041
1042     // FIXME: If tolerateTrailingJunk is true, then we want to tolerate non-8-bit junk
1043     // after the number, so is8Bit is too strict a check.
1044     if (!whitespaceSkipped.is8Bit()) {
1045         return NaN;
1046     }
1047
1048     const char *c = whitespaceSkipped.ascii();
1049
1050     // empty string ?
1051     if (*c == '\0') {
1052         return tolerateEmptyString ? 0.0 : NaN;
1053     }
1054
1055     // hex number ?
1056     if (*c == '0' && (*(c + 1) == 'x' || *(c + 1) == 'X')) {
1057         const char *firstDigitPosition = c + 2;
1058         c++;
1059         d = 0.0;
1060         while (*(++c)) {
1061             if (*c >= '0' && *c <= '9') {
1062                 d = d * 16.0 + *c - '0';
1063             } else if ((*c >= 'A' && *c <= 'F') || (*c >= 'a' && *c <= 'f')) {
1064                 d = d * 16.0 + (*c & 0xdf) - 'A' + 10.0;
1065             } else {
1066                 break;
1067             }
1068         }
1069
1070         if (d >= mantissaOverflowLowerBound) {
1071             d = parseIntOverflow(firstDigitPosition, c - firstDigitPosition, 16);
1072         }
1073     } else {
1074         // regular number ?
1075         char *end;
1076         d = kjs_strtod(c, &end);
1077         if ((d != 0.0 || end != c) && d != Inf && d != -Inf) {
1078             c = end;
1079         } else {
1080             double sign = 1.0;
1081
1082             if (*c == '+') {
1083                 c++;
1084             } else if (*c == '-') {
1085                 sign = -1.0;
1086                 c++;
1087             }
1088
1089             // We used strtod() to do the conversion. However, strtod() handles
1090             // infinite values slightly differently than JavaScript in that it
1091             // converts the string "inf" with any capitalization to infinity,
1092             // whereas the ECMA spec requires that it be converted to NaN.
1093
1094             if (strncmp(c, "Infinity", 8) == 0) {
1095                 d = sign * Inf;
1096                 c += 8;
1097             } else if ((d == Inf || d == -Inf) && *c != 'I' && *c != 'i') {
1098                 c = end;
1099             } else {
1100                 return NaN;
1101             }
1102         }
1103     }
1104
1105     // allow trailing white space
1106     while (isASCIISpace(*c)) {
1107         c++;
1108     }
1109     // don't allow anything after - unless tolerant=true
1110     if (!tolerateTrailingJunk && *c != '\0') {
1111         d = NaN;
1112     }
1113
1114     return d;
1115 }
1116
1117 #ifdef __FAST_MATH__
1118 # error "KJS does not work correctly with -ffast-math"
1119 #endif
1120
1121 double UString::toDouble(bool tolerateTrailingJunk) const
1122 {
1123     return toDouble(tolerateTrailingJunk, true);
1124 }
1125
1126 double UString::toDouble() const
1127 {
1128     return toDouble(false, true);
1129 }
1130
1131 uint32_t UString::toStrictUInt32(bool *ok) const
1132 {
1133     if (ok) {
1134         *ok = false;
1135     }
1136
1137     // Empty string is not OK.
1138     int len = m_rep->len;
1139     if (len == 0) {
1140         return 0;
1141     }
1142     const UChar *p = m_rep->data();
1143     unsigned short c = p->unicode();
1144
1145     // If the first digit is 0, only 0 itself is OK.
1146     if (c == '0') {
1147         if (len == 1 && ok) {
1148             *ok = true;
1149         }
1150         return 0;
1151     }
1152
1153     // Convert to UInt32, checking for overflow.
1154     uint32_t i = 0;
1155     while (1) {
1156         // Process character, turning it into a digit.
1157         if (c < '0' || c > '9') {
1158             return 0;
1159         }
1160         const unsigned d = c - '0';
1161
1162         // Multiply by 10, checking for overflow out of 32 bits.
1163         if (i > 0xFFFFFFFFU / 10) {
1164             return 0;
1165         }
1166         i *= 10;
1167
1168         // Add in the digit, checking for overflow out of 32 bits.
1169         const unsigned max = 0xFFFFFFFFU - d;
1170         if (i > max) {
1171             return 0;
1172         }
1173         i += d;
1174
1175         // Handle end of string.
1176         if (--len == 0) {
1177             if (ok) {
1178                 *ok = true;
1179             }
1180             return i;
1181         }
1182
1183         // Get next character.
1184         c = (++p)->unicode();
1185     }
1186 }
1187
1188 int UString::find(const UString &f, int pos) const
1189 {
1190     int sz = size();
1191     int fsz = f.size();
1192     if (sz < fsz) {
1193         return -1;
1194     }
1195     if (pos < 0) {
1196         pos = 0;
1197     }
1198     if (fsz == 0) {
1199         return pos;
1200     }
1201     const UChar *data_ = data();
1202     const UChar *end = data_ + sz - fsz;
1203     int fsizeminusone = (fsz - 1) * sizeof(UChar);
1204     const UChar *fdata = f.data();
1205     unsigned short fchar = fdata->uc;
1206     ++fdata;
1207     for (const UChar *c = data_ + pos; c <= end; c++)
1208         if (c->uc == fchar && !memcmp(c + 1, fdata, fsizeminusone)) {
1209             return (c - data_);
1210         }
1211
1212     return -1;
1213 }
1214
1215 int UString::find(UChar ch, int pos) const
1216 {
1217     if (pos < 0) {
1218         pos = 0;
1219     }
1220     const UChar *data_ = data();
1221     const UChar *end = data_ + size();
1222     for (const UChar *c = data_ + pos; c < end; c++)
1223         if (*c == ch) {
1224             return (c - data_);
1225         }
1226
1227     return -1;
1228 }
1229
1230 int UString::rfind(const UString &f, int pos) const
1231 {
1232     int sz = size();
1233     int fsz = f.size();
1234     if (sz < fsz) {
1235         return -1;
1236     }
1237     if (pos < 0) {
1238         pos = 0;
1239     }
1240     if (pos > sz - fsz) {
1241         pos = sz - fsz;
1242     }
1243     if (fsz == 0) {
1244         return pos;
1245     }
1246     int fsizeminusone = (fsz - 1) * sizeof(UChar);
1247     const UChar *fdata = f.data();
1248     const UChar *data_ = data();
1249     for (const UChar *c = data_ + pos; c >= data_; c--) {
1250         if (*c == *fdata && !memcmp(c + 1, fdata + 1, fsizeminusone)) {
1251             return (c - data_);
1252         }
1253     }
1254
1255     return -1;
1256 }
1257
1258 int UString::rfind(UChar ch, int pos) const
1259 {
1260     if (isEmpty()) {
1261         return -1;
1262     }
1263     if (pos + 1 >= size()) {
1264         pos = size() - 1;
1265     }
1266     const UChar *data_ = data();
1267     for (const UChar *c = data_ + pos; c >= data_; c--) {
1268         if (*c == ch) {
1269             return (c - data_);
1270         }
1271     }
1272
1273     return -1;
1274 }
1275
1276 UString UString::substr(int pos, int len) const
1277 {
1278     int s = size();
1279
1280     if (pos < 0) {
1281         pos = 0;
1282     } else if (pos >= s) {
1283         pos = s;
1284     }
1285     if (len < 0) {
1286         len = s;
1287     }
1288     if (pos + len >= s) {
1289         len = s - pos;
1290     }
1291
1292     if (pos == 0 && len == s) {
1293         return *this;
1294     }
1295
1296     return UString(Rep::create(m_rep, pos, len));
1297 }
1298
1299 size_t UString::maxUChars() {
1300   return ::KJS::maxUChars();
1301 }
1302
1303 void UString::copyForWriting()
1304 {
1305     int l = size();
1306     if (!l) {
1307         return;    // Not going to touch anything anyway.
1308     }
1309     if (m_rep->rc > 1 || !m_rep->baseIsSelf()) {
1310         UChar *n = allocChars(l);
1311         memcpy(n, data(), l * sizeof(UChar));
1312         m_rep = Rep::create(n, l);
1313     }
1314 }
1315
1316 bool operator==(const UString &s1, const UString &s2)
1317 {
1318 #if 0
1319     if (s1.m_rep == s2.m_rep) {
1320         return true;
1321     }
1322 #endif
1323
1324     if (s1.m_rep->len != s2.m_rep->len) {
1325         return false;
1326     }
1327
1328     return (memcmp(s1.m_rep->data(), s2.m_rep->data(),
1329                    s1.m_rep->len * sizeof(UChar)) == 0);
1330 }
1331
1332 bool operator==(const UString &s1, const char *s2)
1333 {
1334     if (s2 == nullptr) {
1335         return s1.isEmpty();
1336     }
1337
1338     const UChar *u = s1.data();
1339     const UChar *uend = u + s1.size();
1340     while (u != uend && *s2) {
1341         if (u->uc != (unsigned char)*s2) {
1342             return false;
1343         }
1344         s2++;
1345         u++;
1346     }
1347
1348     return u == uend && *s2 == 0;
1349 }
1350
1351 bool operator<(const UString &s1, const UString &s2)
1352 {
1353     const int l1 = s1.size();
1354     const int l2 = s2.size();
1355     const int lmin = l1 < l2 ? l1 : l2;
1356     const UChar *c1 = s1.data();
1357     const UChar *c2 = s2.data();
1358     int l = 0;
1359     while (l < lmin && *c1 == *c2) {
1360         c1++;
1361         c2++;
1362         l++;
1363     }
1364     if (l < lmin) {
1365         return (c1->uc < c2->uc);
1366     }
1367
1368     return (l1 < l2);
1369 }
1370
1371 bool UString::equal(const UString::Rep *r, const UString::Rep *b)
1372 {
1373     if (r == b) {
1374         return true;
1375     }
1376
1377     int length = r->len;
1378     if (length != b->len) {
1379         return false;
1380     }
1381
1382     const UChar *d = r->data();
1383     const UChar *s = b->data();
1384     for (int i = 0; i != length; ++i)
1385         if (d[i].uc != s[i].uc) {
1386             return false;
1387         }
1388     return true;
1389 }
1390
1391 int compare(const UString &s1, const UString &s2)
1392 {
1393     const int l1 = s1.size();
1394     const int l2 = s2.size();
1395     const int lmin = l1 < l2 ? l1 : l2;
1396     const UChar *c1 = s1.data();
1397     const UChar *c2 = s2.data();
1398     int l = 0;
1399     while (l < lmin && *c1 == *c2) {
1400         c1++;
1401         c2++;
1402         l++;
1403     }
1404
1405     if (l < lmin) {
1406         return (c1->uc > c2->uc) ? 1 : -1;
1407     }
1408
1409     if (l1 == l2) {
1410         return 0;
1411     }
1412
1413     return (l1 > l2) ? 1 : -1;
1414 }
1415
1416 inline int inlineUTF8SequenceLengthNonASCII(char b0)
1417 {
1418     if ((b0 & 0xC0) != 0xC0) {
1419         return 0;
1420     }
1421     if ((b0 & 0xE0) == 0xC0) {
1422         return 2;
1423     }
1424     if ((b0 & 0xF0) == 0xE0) {
1425         return 3;
1426     }
1427     if ((b0 & 0xF8) == 0xF0) {
1428         return 4;
1429     }
1430     return 0;
1431 }
1432
1433 int UTF8SequenceLengthNonASCII(char b0)
1434 {
1435     return inlineUTF8SequenceLengthNonASCII(b0);
1436 }
1437
1438 inline int inlineUTF8SequenceLength(char b0)
1439 {
1440     return (b0 & 0x80) == 0 ? 1 : UTF8SequenceLengthNonASCII(b0);
1441 }
1442
1443 // Given a first byte, gives the length of the UTF-8 sequence it begins.
1444 // Returns 0 for bytes that are not legal starts of UTF-8 sequences.
1445 // Only allows sequences of up to 4 bytes, since that works for all Unicode characters (U-00000000 to U-0010FFFF).
1446 int UTF8SequenceLength(char b0)
1447 {
1448     return (b0 & 0x80) == 0 ? 1 : inlineUTF8SequenceLengthNonASCII(b0);
1449 }
1450
1451 // Takes a null-terminated C-style string with a UTF-8 sequence in it and converts it to a character.
1452 // Only allows Unicode characters (U-00000000 to U-0010FFFF).
1453 // Returns -1 if the sequence is not valid (including presence of extra bytes).
1454 int decodeUTF8Sequence(const char *sequence)
1455 {
1456     // Handle 0-byte sequences (never valid).
1457     const unsigned char b0 = sequence[0];
1458     const int length = inlineUTF8SequenceLength(b0);
1459     if (length == 0) {
1460         return -1;
1461     }
1462
1463     // Handle 1-byte sequences (plain ASCII).
1464     const unsigned char b1 = sequence[1];
1465     if (length == 1) {
1466         if (b1) {
1467             return -1;
1468         }
1469         return b0;
1470     }
1471
1472     // Handle 2-byte sequences.
1473     if ((b1 & 0xC0) != 0x80) {
1474         return -1;
1475     }
1476     const unsigned char b2 = sequence[2];
1477     if (length == 2) {
1478         if (b2) {
1479             return -1;
1480         }
1481         const int c = ((b0 & 0x1F) << 6) | (b1 & 0x3F);
1482         if (c < 0x80) {
1483             return -1;
1484         }
1485         return c;
1486     }
1487
1488     // Handle 3-byte sequences.
1489     if ((b2 & 0xC0) != 0x80) {
1490         return -1;
1491     }
1492     const unsigned char b3 = sequence[3];
1493     if (length == 3) {
1494         if (b3) {
1495             return -1;
1496         }
1497         const int c = ((b0 & 0xF) << 12) | ((b1 & 0x3F) << 6) | (b2 & 0x3F);
1498         if (c < 0x800) {
1499             return -1;
1500         }
1501         // UTF-16 surrogates should never appear in UTF-8 data.
1502         if (c >= 0xD800 && c <= 0xDFFF) {
1503             return -1;
1504         }
1505         // Backwards BOM and U+FFFF should never appear in UTF-8 data.
1506         if (c == 0xFFFE || c == 0xFFFF) {
1507             return -1;
1508         }
1509         return c;
1510     }
1511
1512     // Handle 4-byte sequences.
1513     if ((b3 & 0xC0) != 0x80) {
1514         return -1;
1515     }
1516     const unsigned char b4 = sequence[4];
1517     if (length == 4) {
1518         if (b4) {
1519             return -1;
1520         }
1521         const int c = ((b0 & 0x7) << 18) | ((b1 & 0x3F) << 12) | ((b2 & 0x3F) << 6) | (b3 & 0x3F);
1522         if (c < 0x10000 || c > 0x10FFFF) {
1523             return -1;
1524         }
1525         return c;
1526     }
1527
1528     return -1;
1529 }
1530
1531 CString UString::UTF8String() const
1532 {
1533     // Allocate a buffer big enough to hold all the characters.
1534     const int length = size();
1535     Vector<char, 1024> buffer(length * 3);
1536
1537     // Convert to runs of 8-bit characters.
1538     char *p = buffer.begin();
1539     const unsigned short *d = &data()->uc;
1540     for (int i = 0; i != length; ++i) {
1541         unsigned int c = d[i], sc;
1542         if (c < 0x80) {
1543             *p++ = (char)c;
1544         } else if (c < 0x800) {
1545             *p++ = (char)((c >> 6) | 0xC0); // C0 is the 2-byte flag for UTF-8
1546             *p++ = (char)((c | 0x80) & 0xBF); // next 6 bits, with high bit set
1547         } else if (c >= 0xD800 && c <= 0xDBFF && (i + 1) < length &&
1548                    (sc = d[i + 1]) >= 0xDC00 && sc <= 0xDFFF) {
1549             sc = 0x10000 + (((c & 0x3FF) << 10) | (sc & 0x3FF));
1550             *p++ = (char)((sc >> 18) | 0xF0); // F0 is the 4-byte flag for UTF-8
1551             *p++ = (char)(((sc >> 12) | 0x80) & 0xBF); // next 6 bits, with high bit set
1552             *p++ = (char)(((sc >> 6) | 0x80) & 0xBF); // next 6 bits, with high bit set
1553             *p++ = (char)((sc | 0x80) & 0xBF); // next 6 bits, with high bit set
1554             ++i;
1555         } else {
1556             *p++ = (char)((c >> 12) | 0xE0); // E0 is the 3-byte flag for UTF-8
1557             *p++ = (char)(((c >> 6) | 0x80) & 0xBF); // next 6 bits, with high bit set
1558             *p++ = (char)((c | 0x80) & 0xBF); // next 6 bits, with high bit set
1559         }
1560     }
1561
1562     // Return the result as a C string.
1563     CString result(buffer.data(), p - buffer.data());
1564
1565     return result;
1566 }
1567
1568 } // namespace KJS