File indexing completed on 2024-05-19 11:41:33
0001 /* 0002 SPDX-FileCopyrightText: 2013 Szókovács Róbert <szo@szo.hu> 0003 0004 SPDX-License-Identifier: LGPL-2.0-or-later 0005 */ 0006 0007 #include "legacycodec.h" 0008 0009 QByteArray LegacyCodec::encodeFileNameUTF8(const QString &fileName) 0010 { 0011 int len = fileName.length(); 0012 const QChar *uc = fileName.constData(); 0013 0014 uchar replacement = '?'; 0015 int rlen = 3 * len; 0016 int surrogate_high = -1; 0017 0018 QByteArray rstr; 0019 rstr.resize(rlen); 0020 uchar *cursor = (uchar *)rstr.data(); 0021 const QChar *ch = uc; 0022 int invalid = 0; 0023 0024 const QChar *end = ch + len; 0025 while (ch < end) { 0026 uint u = ch->unicode(); 0027 if (surrogate_high >= 0) { 0028 if (ch->isLowSurrogate()) { 0029 u = QChar::surrogateToUcs4(surrogate_high, u); 0030 surrogate_high = -1; 0031 } else { 0032 // high surrogate without low 0033 *cursor = replacement; 0034 ++ch; 0035 ++invalid; 0036 surrogate_high = -1; 0037 continue; 0038 } 0039 } else if (ch->isLowSurrogate()) { 0040 // low surrogate without high 0041 *cursor = replacement; 0042 ++ch; 0043 ++invalid; 0044 continue; 0045 } else if (ch->isHighSurrogate()) { 0046 surrogate_high = u; 0047 ++ch; 0048 continue; 0049 } 0050 0051 if (u >= 0x10FE00 && u <= 0x10FE7F) { 0052 *cursor++ = uchar(u - 0x10FE00 + 128); 0053 } else if (u < 0x80) { 0054 *cursor++ = uchar(u); 0055 } else { 0056 if (u < 0x0800) { 0057 *cursor++ = 0xc0 | uchar(u >> 6); 0058 } else { 0059 // is it one of the Unicode non-characters? 0060 if (QChar::isNonCharacter(u)) { 0061 *cursor++ = replacement; 0062 ++ch; 0063 ++invalid; 0064 continue; 0065 } 0066 0067 if (u > 0xffff) { 0068 *cursor++ = 0xf0 | uchar(u >> 18); 0069 *cursor++ = 0x80 | uchar((u >> 12) & 0x3f); 0070 } else { 0071 *cursor++ = 0xe0 | uchar((u >> 12) & 0x3f); 0072 } 0073 *cursor++ = 0x80 | uchar((u >> 6) & 0x3f); 0074 } 0075 *cursor++ = 0x80 | uchar(u & 0x3f); 0076 } 0077 ++ch; 0078 } 0079 0080 rstr.resize(cursor - (const uchar *)rstr.constData()); 0081 return rstr; 0082 } 0083 0084 QString LegacyCodec::decodeFileNameUTF8(const QByteArray &localFileName) 0085 { 0086 const char *chars = localFileName.constData(); 0087 const int len = localFileName.size(); 0088 int need = 0; 0089 uint uc = 0; 0090 uint min_uc = 0; 0091 0092 QString result(need + 2 * len + 1, Qt::Uninitialized); // worst case 0093 ushort *qch = (ushort *)result.unicode(); 0094 uchar ch; 0095 0096 for (int i = 0; i < len; ++i) { 0097 ch = chars[i]; 0098 if (need) { 0099 if ((ch & 0xc0) == 0x80) { 0100 uc = (uc << 6) | (ch & 0x3f); 0101 --need; 0102 if (!need) { 0103 bool nonCharacter = QChar::isNonCharacter(uc); 0104 if (!nonCharacter && uc > 0xffff && uc < 0x110000) { 0105 // surrogate pair 0106 Q_ASSERT((qch - (ushort *)result.unicode()) + 2 < result.length()); 0107 *qch++ = QChar::highSurrogate(uc); 0108 *qch++ = QChar::lowSurrogate(uc); 0109 } else if ((uc < min_uc) || (uc >= 0xd800 && uc <= 0xdfff) || nonCharacter || uc >= 0x110000) { 0110 // error: overlong sequence, UTF16 surrogate or non-character 0111 goto error; 0112 } else { 0113 *qch++ = uc; 0114 } 0115 } 0116 } else { 0117 goto error; 0118 } 0119 } else { 0120 if (ch < 128) { 0121 *qch++ = ushort(ch); 0122 } else if ((ch & 0xe0) == 0xc0) { 0123 uc = ch & 0x1f; 0124 need = 1; 0125 min_uc = 0x80; 0126 } else if ((ch & 0xf0) == 0xe0) { 0127 uc = ch & 0x0f; 0128 need = 2; 0129 min_uc = 0x800; 0130 } else if ((ch & 0xf8) == 0xf0) { 0131 uc = ch & 0x07; 0132 need = 3; 0133 min_uc = 0x10000; 0134 } else { 0135 goto error; 0136 } 0137 } 0138 } 0139 if (need > 0) { 0140 // unterminated UTF sequence 0141 goto error; 0142 } 0143 result.truncate(qch - (ushort *)result.unicode()); 0144 return result; 0145 0146 error: 0147 0148 qch = (ushort *)result.unicode(); 0149 for (int i = 0; i < len; ++i) { 0150 ch = chars[i]; 0151 if (ch < 128) { 0152 *qch++ = ushort(ch); 0153 } else { 0154 uint uc = ch - 128 + 0x10FE00; // U+10FE00-U+10FE7F 0155 *qch++ = QChar::highSurrogate(uc); 0156 *qch++ = QChar::lowSurrogate(uc); 0157 } 0158 } 0159 result.truncate(qch - (ushort *)result.unicode()); 0160 return result; 0161 }