File indexing completed on 2024-05-05 16:13:51

0001 /*
0002     SPDX-FileCopyrightText: 2013 Szókovács Róbert <szo@szo.hu>
0003 
0004     SPDX-License-Identifier: LGPL-2.0-or-later
0005 */
0006 
0007 #include "legacycodec.h"
0008 
0009 QByteArray LegacyCodec::encodeFileNameUTF8(const QString &fileName)
0010 {
0011     int len = fileName.length();
0012     const QChar *uc = fileName.constData();
0013 
0014     uchar replacement = '?';
0015     int rlen = 3 * len;
0016     int surrogate_high = -1;
0017 
0018     QByteArray rstr;
0019     rstr.resize(rlen);
0020     uchar *cursor = (uchar *)rstr.data();
0021     const QChar *ch = uc;
0022     int invalid = 0;
0023 
0024     const QChar *end = ch + len;
0025     while (ch < end) {
0026         uint u = ch->unicode();
0027         if (surrogate_high >= 0) {
0028             if (ch->isLowSurrogate()) {
0029                 u = QChar::surrogateToUcs4(surrogate_high, u);
0030                 surrogate_high = -1;
0031             } else {
0032                 // high surrogate without low
0033                 *cursor = replacement;
0034                 ++ch;
0035                 ++invalid;
0036                 surrogate_high = -1;
0037                 continue;
0038             }
0039         } else if (ch->isLowSurrogate()) {
0040             // low surrogate without high
0041             *cursor = replacement;
0042             ++ch;
0043             ++invalid;
0044             continue;
0045         } else if (ch->isHighSurrogate()) {
0046             surrogate_high = u;
0047             ++ch;
0048             continue;
0049         }
0050 
0051         if (u >= 0x10FE00 && u <= 0x10FE7F) {
0052             *cursor++ = uchar(u - 0x10FE00 + 128);
0053         } else if (u < 0x80) {
0054             *cursor++ = uchar(u);
0055         } else {
0056             if (u < 0x0800) {
0057                 *cursor++ = 0xc0 | uchar(u >> 6);
0058             } else {
0059                 // is it one of the Unicode non-characters?
0060                 if (QChar::isNonCharacter(u)) {
0061                     *cursor++ = replacement;
0062                     ++ch;
0063                     ++invalid;
0064                     continue;
0065                 }
0066 
0067                 if (u > 0xffff) {
0068                     *cursor++ = 0xf0 | uchar(u >> 18);
0069                     *cursor++ = 0x80 | uchar((u >> 12) & 0x3f);
0070                 } else {
0071                     *cursor++ = 0xe0 | uchar((u >> 12) & 0x3f);
0072                 }
0073                 *cursor++ = 0x80 | uchar((u >> 6) & 0x3f);
0074             }
0075             *cursor++ = 0x80 | uchar(u & 0x3f);
0076         }
0077         ++ch;
0078     }
0079 
0080     rstr.resize(cursor - (const uchar *)rstr.constData());
0081     return rstr;
0082 }
0083 
0084 QString LegacyCodec::decodeFileNameUTF8(const QByteArray &localFileName)
0085 {
0086     const char *chars = localFileName.constData();
0087     const int len = localFileName.size();
0088     int need = 0;
0089     uint uc = 0;
0090     uint min_uc = 0;
0091 
0092     QString result(need + 2 * len + 1, Qt::Uninitialized); // worst case
0093     ushort *qch = (ushort *)result.unicode();
0094     uchar ch;
0095 
0096     for (int i = 0; i < len; ++i) {
0097         ch = chars[i];
0098         if (need) {
0099             if ((ch & 0xc0) == 0x80) {
0100                 uc = (uc << 6) | (ch & 0x3f);
0101                 --need;
0102                 if (!need) {
0103                     bool nonCharacter = QChar::isNonCharacter(uc);
0104                     if (!nonCharacter && uc > 0xffff && uc < 0x110000) {
0105                         // surrogate pair
0106                         Q_ASSERT((qch - (ushort *)result.unicode()) + 2 < result.length());
0107                         *qch++ = QChar::highSurrogate(uc);
0108                         *qch++ = QChar::lowSurrogate(uc);
0109                     } else if ((uc < min_uc) || (uc >= 0xd800 && uc <= 0xdfff) || nonCharacter || uc >= 0x110000) {
0110                         // error: overlong sequence, UTF16 surrogate or non-character
0111                         goto error;
0112                     } else {
0113                         *qch++ = uc;
0114                     }
0115                 }
0116             } else {
0117                 goto error;
0118             }
0119         } else {
0120             if (ch < 128) {
0121                 *qch++ = ushort(ch);
0122             } else if ((ch & 0xe0) == 0xc0) {
0123                 uc = ch & 0x1f;
0124                 need = 1;
0125                 min_uc = 0x80;
0126             } else if ((ch & 0xf0) == 0xe0) {
0127                 uc = ch & 0x0f;
0128                 need = 2;
0129                 min_uc = 0x800;
0130             } else if ((ch & 0xf8) == 0xf0) {
0131                 uc = ch & 0x07;
0132                 need = 3;
0133                 min_uc = 0x10000;
0134             } else {
0135                 goto error;
0136             }
0137         }
0138     }
0139     if (need > 0) {
0140         // unterminated UTF sequence
0141         goto error;
0142     }
0143     result.truncate(qch - (ushort *)result.unicode());
0144     return result;
0145 
0146 error:
0147 
0148     qch = (ushort *)result.unicode();
0149     for (int i = 0; i < len; ++i) {
0150         ch = chars[i];
0151         if (ch < 128) {
0152             *qch++ = ushort(ch);
0153         } else {
0154             uint uc = ch - 128 + 0x10FE00; // U+10FE00-U+10FE7F
0155             *qch++ = QChar::highSurrogate(uc);
0156             *qch++ = QChar::lowSurrogate(uc);
0157         }
0158     }
0159     result.truncate(qch - (ushort *)result.unicode());
0160     return result;
0161 }