Warning, file /pim/kmime/src/kmime_codecs.cpp was not indexed or was modified since last indexation (in which case cross-reference links may be missing, inaccurate or erroneous).
0001 /* 0002 kmime_codecs.cpp 0003 0004 KMime, the KDE Internet mail/usenet news message library. 0005 SPDX-FileCopyrightText: 2001 the KMime authors. 0006 See file AUTHORS for details 0007 0008 SPDX-License-Identifier: LGPL-2.0-or-later 0009 */ 0010 0011 #include "kmime_codecs.h" 0012 #include "kmime_debug.h" 0013 0014 #include <QTextCodec> 0015 0016 namespace KMime { 0017 0018 static const char reservedCharacters[] = "\"()<>@,.;:\\[]="; 0019 0020 QByteArray encodeRFC2047String(const QString &src, const QByteArray &charset, 0021 bool addressHeader, bool allow8BitHeaders) 0022 { 0023 QByteArray result; 0024 int start = 0; 0025 int end = 0; 0026 bool nonAscii = false; 0027 bool useQEncoding = false; 0028 0029 // fromLatin1() is safe here, codecForName() uses toLatin1() internally 0030 const QTextCodec *codec = QTextCodec::codecForName(charset); 0031 0032 QByteArray usedCS; 0033 if (!codec) { 0034 //no codec available => try local8Bit and hope the best ;-) 0035 codec = QTextCodec::codecForLocale(); 0036 usedCS = codec->name(); 0037 } else { 0038 if (charset.isEmpty()) { 0039 usedCS = codec->name(); 0040 } else { 0041 usedCS = charset; 0042 } 0043 } 0044 0045 QTextCodec::ConverterState converterState(QTextCodec::IgnoreHeader); 0046 QByteArray encoded8Bit = codec->fromUnicode(src.constData(), src.length(), &converterState); 0047 if (converterState.invalidChars > 0) { 0048 usedCS = "utf-8"; 0049 codec = QTextCodec::codecForName(usedCS); 0050 encoded8Bit = codec->fromUnicode(src); 0051 } 0052 0053 if (usedCS.contains("8859-")) { // use "B"-Encoding for non iso-8859-x charsets 0054 useQEncoding = true; 0055 } 0056 0057 if (allow8BitHeaders) { 0058 return encoded8Bit; 0059 } 0060 0061 int encoded8BitLength = encoded8Bit.length(); 0062 for (int i = 0; i < encoded8BitLength; i++) { 0063 if (encoded8Bit[i] == ' ') { // encoding starts at word boundaries 0064 start = i + 1; 0065 } 0066 0067 // encode escape character, for japanese encodings... 0068 if (((signed char)encoded8Bit[i] < 0) || (encoded8Bit[i] == '\033') || 0069 (addressHeader && (strchr("\"()<>@,.;:\\[]=", encoded8Bit[i]) != nullptr))) { 0070 end = start; // non us-ascii char found, now we determine where to stop encoding 0071 nonAscii = true; 0072 break; 0073 } 0074 } 0075 0076 if (nonAscii) { 0077 while ((end < encoded8Bit.length()) && (encoded8Bit[end] != ' ')) { 0078 // we encode complete words 0079 end++; 0080 } 0081 0082 for (int x = end; x < encoded8Bit.length(); x++) { 0083 if (((signed char)encoded8Bit[x] < 0) || (encoded8Bit[x] == '\033') || 0084 (addressHeader && (strchr(reservedCharacters, encoded8Bit[x]) != nullptr))) { 0085 end = x; // we found another non-ascii word 0086 0087 while ((end < encoded8Bit.length()) && (encoded8Bit[end] != ' ')) { 0088 // we encode complete words 0089 end++; 0090 } 0091 } 0092 } 0093 0094 result = encoded8Bit.left(start) + "=?" + usedCS; 0095 0096 if (useQEncoding) { 0097 result += "?Q?"; 0098 0099 char c; 0100 char hexcode; // "Q"-encoding implementation described in RFC 2047 0101 for (int i = start; i < end; i++) { 0102 c = encoded8Bit[i]; 0103 if (c == ' ') { // make the result readable with not MIME-capable readers 0104 result += '_'; 0105 } else { 0106 if (((c >= 'a') && (c <= 'z')) || // paranoid mode, encode *all* special chars to avoid problems 0107 ((c >= 'A') && (c <= 'Z')) || // with "From" & "To" headers 0108 ((c >= '0') && (c <= '9'))) { 0109 result += c; 0110 } else { 0111 result += '='; // "stolen" from KMail ;-) 0112 hexcode = ((c & 0xF0) >> 4) + 48; 0113 if (hexcode >= 58) { 0114 hexcode += 7; 0115 } 0116 result += hexcode; 0117 hexcode = (c & 0x0F) + 48; 0118 if (hexcode >= 58) { 0119 hexcode += 7; 0120 } 0121 result += hexcode; 0122 } 0123 } 0124 } 0125 } else { 0126 result += "?B?" + encoded8Bit.mid(start, end - start).toBase64(); 0127 } 0128 0129 result += "?="; 0130 result += encoded8Bit.right(encoded8Bit.length() - end); 0131 } else { 0132 result = encoded8Bit; 0133 } 0134 0135 return result; 0136 } 0137 0138 QByteArray encodeRFC2047Sentence(const QString &src, const QByteArray &charset) 0139 { 0140 QByteArray result; 0141 const QChar *ch = src.constData(); 0142 const int length = src.length(); 0143 int pos = 0; 0144 int wordStart = 0; 0145 0146 //qCDebug(KMIME_LOG) << "Input:" << src; 0147 // Loop over all characters of the string. 0148 // When encountering a split character, RFC-2047-encode the word before it, and add it to the result. 0149 while (pos < length) { 0150 //qCDebug(KMIME_LOG) << "Pos:" << pos << "Result:" << result << "Char:" << ch->toLatin1(); 0151 const bool isAscii = ch->unicode() < 127; 0152 const bool isReserved = (strchr(reservedCharacters, ch->toLatin1()) != nullptr); 0153 if (isAscii && isReserved) { 0154 const int wordSize = pos - wordStart; 0155 if (wordSize > 0) { 0156 const QString word = src.mid(wordStart, wordSize); 0157 result += encodeRFC2047String(word, charset); 0158 } 0159 0160 result += ch->toLatin1(); 0161 wordStart = pos + 1; 0162 } 0163 ch++; 0164 pos++; 0165 } 0166 0167 // Encode the last word 0168 const int wordSize = pos - wordStart; 0169 if (wordSize > 0) { 0170 const QString word = src.mid(wordStart, pos - wordStart); 0171 result += encodeRFC2047String(word, charset); 0172 } 0173 0174 return result; 0175 } 0176 0177 //----------------------------------------------------------------------------- 0178 QByteArray encodeRFC2231String(const QString &str, const QByteArray &charset) 0179 { 0180 if (str.isEmpty()) { 0181 return {}; 0182 } 0183 0184 const QTextCodec *codec = QTextCodec::codecForName(charset); 0185 QByteArray latin; 0186 if (charset == "us-ascii") { 0187 latin = str.toLatin1(); 0188 } else if (codec) { 0189 latin = codec->fromUnicode(str); 0190 } else { 0191 latin = str.toLocal8Bit(); 0192 } 0193 0194 char *l; 0195 for (l = latin.data(); *l; ++l) { 0196 if (((*l & 0xE0) == 0) || (*l & 0x80)) { 0197 // *l is control character or 8-bit char 0198 break; 0199 } 0200 } 0201 if (!*l) { 0202 return latin; 0203 } 0204 0205 QByteArray result = charset + "''"; 0206 for (l = latin.data(); *l; ++l) { 0207 bool needsQuoting = (*l & 0x80) || (*l == '%'); 0208 if (!needsQuoting) { 0209 const QByteArray especials = "()<>@,;:\"/[]?.= \033"; 0210 int len = especials.length(); 0211 for (int i = 0; i < len; i++) { 0212 if (*l == especials[i]) { 0213 needsQuoting = true; 0214 break; 0215 } 0216 } 0217 } 0218 if (needsQuoting) { 0219 result += '%'; 0220 unsigned char hexcode; 0221 hexcode = ((*l & 0xF0) >> 4) + 48; 0222 if (hexcode >= 58) { 0223 hexcode += 7; 0224 } 0225 result += hexcode; 0226 hexcode = (*l & 0x0F) + 48; 0227 if (hexcode >= 58) { 0228 hexcode += 7; 0229 } 0230 result += hexcode; 0231 } else { 0232 result += *l; 0233 } 0234 } 0235 return result; 0236 } 0237 0238 //----------------------------------------------------------------------------- 0239 QString decodeRFC2231String(const QByteArray &str, QByteArray &usedCS, const QByteArray &defaultCS, 0240 bool forceCS) 0241 { 0242 int p = str.indexOf('\''); 0243 if (p < 0) { 0244 auto codec = QTextCodec::codecForName(defaultCS); 0245 if (!codec) { 0246 codec = QTextCodec::codecForLocale(); 0247 } 0248 return codec->toUnicode(str); 0249 } 0250 0251 QByteArray charset = str.left(p); 0252 0253 QByteArray st = str.mid(str.lastIndexOf('\'') + 1); 0254 0255 char ch; 0256 char ch2; 0257 p = 0; 0258 while (p < st.length()) { 0259 if (st.at(p) == 37) { 0260 // Only try to decode the percent-encoded character if the percent sign 0261 // is really followed by two other characters, see testcase at bug 163024 0262 if (p + 2 < st.length()) { 0263 ch = st.at(p + 1) - 48; 0264 if (ch > 16) { 0265 ch -= 7; 0266 } 0267 ch2 = st.at(p + 2) - 48; 0268 if (ch2 > 16) { 0269 ch2 -= 7; 0270 } 0271 st[p] = ch * 16 + ch2; 0272 st.remove(p + 1, 2); 0273 } 0274 } 0275 p++; 0276 } 0277 qCDebug(KMIME_LOG) << "Got pre-decoded:" << st; 0278 const QTextCodec *charsetcodec = QTextCodec::codecForName(charset); 0279 if (!charsetcodec || forceCS) { 0280 charsetcodec = QTextCodec::codecForName(defaultCS); 0281 } 0282 if (!charsetcodec) { 0283 charsetcodec = QTextCodec::codecForLocale(); 0284 } 0285 0286 usedCS = charsetcodec->name(); 0287 return charsetcodec->toUnicode(st); 0288 } 0289 0290 }