Warning, file /pim/kmime/src/kmime_codecs.cpp was not indexed or was modified since last indexation (in which case cross-reference links may be missing, inaccurate or erroneous).
0001 /* 0002 kmime_codecs.cpp 0003 0004 KMime, the KDE Internet mail/usenet news message library. 0005 SPDX-FileCopyrightText: 2001 the KMime authors. 0006 See file AUTHORS for details 0007 0008 SPDX-License-Identifier: LGPL-2.0-or-later 0009 */ 0010 0011 #include "kmime_codecs.h" 0012 #include "kmime_debug.h" 0013 0014 #include <QTextCodec> 0015 0016 namespace KMime { 0017 0018 static const char reservedCharacters[] = "\"()<>@,.;:\\[]="; 0019 0020 QByteArray encodeRFC2047String(const QString &src, const QByteArray &charset, 0021 bool addressHeader, bool allow8BitHeaders) 0022 { 0023 QByteArray result; 0024 int start = 0; 0025 int end = 0; 0026 bool nonAscii = false; 0027 bool useQEncoding = false; 0028 0029 // fromLatin1() is safe here, codecForName() uses toLatin1() internally 0030 const QTextCodec *codec = QTextCodec::codecForName(charset); 0031 0032 QByteArray usedCS; 0033 if (!codec) { 0034 //no codec available => try local8Bit and hope the best ;-) 0035 codec = QTextCodec::codecForLocale(); 0036 usedCS = codec->name(); 0037 } else { 0038 if (charset.isEmpty()) { 0039 usedCS = codec->name(); 0040 } else { 0041 usedCS = charset; 0042 } 0043 } 0044 0045 QTextCodec::ConverterState converterState(QTextCodec::IgnoreHeader); 0046 QByteArray encoded8Bit = codec->fromUnicode(src.constData(), src.length(), &converterState); 0047 if (converterState.invalidChars > 0) { 0048 usedCS = "utf-8"; 0049 codec = QTextCodec::codecForName(usedCS); 0050 encoded8Bit = codec->fromUnicode(src); 0051 } 0052 0053 if (usedCS.contains("8859-")) { // use "B"-Encoding for non iso-8859-x charsets 0054 useQEncoding = true; 0055 } 0056 0057 if (allow8BitHeaders) { 0058 return encoded8Bit; 0059 } 0060 0061 int encoded8BitLength = encoded8Bit.length(); 0062 for (int i = 0; i < encoded8BitLength; i++) { 0063 if (encoded8Bit[i] == ' ') { // encoding starts at word boundaries 0064 start = i + 1; 0065 } 0066 0067 // encode escape character, for japanese encodings... 0068 if (((signed char)encoded8Bit[i] < 0) || (encoded8Bit[i] == '\033') || 0069 (addressHeader && (strchr("\"()<>@,.;:\\[]=", encoded8Bit[i]) != nullptr))) { 0070 end = start; // non us-ascii char found, now we determine where to stop encoding 0071 nonAscii = true; 0072 break; 0073 } 0074 } 0075 0076 if (nonAscii) { 0077 while ((end < encoded8Bit.length()) && (encoded8Bit[end] != ' ')) { 0078 // we encode complete words 0079 end++; 0080 } 0081 0082 for (int x = end; x < encoded8Bit.length(); x++) { 0083 if (((signed char)encoded8Bit[x] < 0) || (encoded8Bit[x] == '\033') || 0084 (addressHeader && (strchr(reservedCharacters, encoded8Bit[x]) != nullptr))) { 0085 end = x; // we found another non-ascii word 0086 0087 while ((end < encoded8Bit.length()) && (encoded8Bit[end] != ' ')) { 0088 // we encode complete words 0089 end++; 0090 } 0091 } 0092 } 0093 0094 result = encoded8Bit.left(start) + "=?" + usedCS; 0095 0096 if (useQEncoding) { 0097 result += "?Q?"; 0098 0099 char hexcode; // "Q"-encoding implementation described in RFC 2047 0100 for (int i = start; i < end; i++) { 0101 char c = encoded8Bit[i]; 0102 if (c == ' ') { // make the result readable with not MIME-capable readers 0103 result += '_'; 0104 } else { 0105 if (((c >= 'a') && (c <= 'z')) || // paranoid mode, encode *all* special chars to avoid problems 0106 ((c >= 'A') && (c <= 'Z')) || // with "From" & "To" headers 0107 ((c >= '0') && (c <= '9'))) { 0108 result += c; 0109 } else { 0110 result += '='; // "stolen" from KMail ;-) 0111 hexcode = ((c & 0xF0) >> 4) + 48; 0112 if (hexcode >= 58) { 0113 hexcode += 7; 0114 } 0115 result += hexcode; 0116 hexcode = (c & 0x0F) + 48; 0117 if (hexcode >= 58) { 0118 hexcode += 7; 0119 } 0120 result += hexcode; 0121 } 0122 } 0123 } 0124 } else { 0125 result += "?B?" + encoded8Bit.mid(start, end - start).toBase64(); 0126 } 0127 0128 result += "?="; 0129 result += encoded8Bit.right(encoded8Bit.length() - end); 0130 } else { 0131 result = encoded8Bit; 0132 } 0133 0134 return result; 0135 } 0136 0137 QByteArray encodeRFC2047Sentence(const QString &src, const QByteArray &charset) 0138 { 0139 QByteArray result; 0140 const QChar *ch = src.constData(); 0141 const int length = src.length(); 0142 int pos = 0; 0143 int wordStart = 0; 0144 0145 //qCDebug(KMIME_LOG) << "Input:" << src; 0146 // Loop over all characters of the string. 0147 // When encountering a split character, RFC-2047-encode the word before it, and add it to the result. 0148 while (pos < length) { 0149 //qCDebug(KMIME_LOG) << "Pos:" << pos << "Result:" << result << "Char:" << ch->toLatin1(); 0150 const bool isAscii = ch->unicode() < 127; 0151 const bool isReserved = (strchr(reservedCharacters, ch->toLatin1()) != nullptr); 0152 if (isAscii && isReserved) { 0153 const int wordSize = pos - wordStart; 0154 if (wordSize > 0) { 0155 const QString word = src.mid(wordStart, wordSize); 0156 result += encodeRFC2047String(word, charset); 0157 } 0158 0159 result += ch->toLatin1(); 0160 wordStart = pos + 1; 0161 } 0162 ch++; 0163 pos++; 0164 } 0165 0166 // Encode the last word 0167 const int wordSize = pos - wordStart; 0168 if (wordSize > 0) { 0169 const QString word = src.mid(wordStart, pos - wordStart); 0170 result += encodeRFC2047String(word, charset); 0171 } 0172 0173 return result; 0174 } 0175 0176 //----------------------------------------------------------------------------- 0177 QByteArray encodeRFC2231String(const QString &str, const QByteArray &charset) 0178 { 0179 if (str.isEmpty()) { 0180 return {}; 0181 } 0182 0183 const QTextCodec *codec = QTextCodec::codecForName(charset); 0184 QByteArray latin; 0185 if (charset == "us-ascii") { 0186 latin = str.toLatin1(); 0187 } else if (codec) { 0188 latin = codec->fromUnicode(str); 0189 } else { 0190 latin = str.toLocal8Bit(); 0191 } 0192 0193 char *l; 0194 for (l = latin.data(); *l; ++l) { 0195 if (((*l & 0xE0) == 0) || (*l & 0x80)) { 0196 // *l is control character or 8-bit char 0197 break; 0198 } 0199 } 0200 if (!*l) { 0201 return latin; 0202 } 0203 0204 QByteArray result = charset + "''"; 0205 for (l = latin.data(); *l; ++l) { 0206 bool needsQuoting = (*l & 0x80) || (*l == '%'); 0207 if (!needsQuoting) { 0208 const QByteArray especials = "()<>@,;:\"/[]?.= \033"; 0209 int len = especials.length(); 0210 for (int i = 0; i < len; i++) { 0211 if (*l == especials[i]) { 0212 needsQuoting = true; 0213 break; 0214 } 0215 } 0216 } 0217 if (needsQuoting) { 0218 result += '%'; 0219 unsigned char hexcode; 0220 hexcode = ((*l & 0xF0) >> 4) + 48; 0221 if (hexcode >= 58) { 0222 hexcode += 7; 0223 } 0224 result += hexcode; 0225 hexcode = (*l & 0x0F) + 48; 0226 if (hexcode >= 58) { 0227 hexcode += 7; 0228 } 0229 result += hexcode; 0230 } else { 0231 result += *l; 0232 } 0233 } 0234 return result; 0235 } 0236 0237 //----------------------------------------------------------------------------- 0238 QString decodeRFC2231String(const QByteArray &str, QByteArray &usedCS, const QByteArray &defaultCS, 0239 bool forceCS) 0240 { 0241 int p = str.indexOf('\''); 0242 if (p < 0) { 0243 auto codec = QTextCodec::codecForName(defaultCS); 0244 if (!codec) { 0245 codec = QTextCodec::codecForLocale(); 0246 } 0247 return codec->toUnicode(str); 0248 } 0249 0250 QByteArray charset = str.left(p); 0251 0252 QByteArray st = str.mid(str.lastIndexOf('\'') + 1); 0253 0254 char ch; 0255 char ch2; 0256 p = 0; 0257 while (p < st.length()) { 0258 if (st.at(p) == 37) { 0259 // Only try to decode the percent-encoded character if the percent sign 0260 // is really followed by two other characters, see testcase at bug 163024 0261 if (p + 2 < st.length()) { 0262 ch = st.at(p + 1) - 48; 0263 if (ch > 16) { 0264 ch -= 7; 0265 } 0266 ch2 = st.at(p + 2) - 48; 0267 if (ch2 > 16) { 0268 ch2 -= 7; 0269 } 0270 st[p] = ch * 16 + ch2; 0271 st.remove(p + 1, 2); 0272 } 0273 } 0274 p++; 0275 } 0276 qCDebug(KMIME_LOG) << "Got pre-decoded:" << st; 0277 const QTextCodec *charsetcodec = QTextCodec::codecForName(charset); 0278 if (!charsetcodec || forceCS) { 0279 charsetcodec = QTextCodec::codecForName(defaultCS); 0280 } 0281 if (!charsetcodec) { 0282 charsetcodec = QTextCodec::codecForLocale(); 0283 } 0284 0285 usedCS = charsetcodec->name(); 0286 return charsetcodec->toUnicode(st); 0287 } 0288 0289 }