File indexing completed on 2024-04-21 05:18:10
0001 /* 0002 kmime_codecs.cpp 0003 0004 KMime, the KDE Internet mail/usenet news message library. 0005 SPDX-FileCopyrightText: 2001 the KMime authors. 0006 See file AUTHORS for details 0007 0008 SPDX-License-Identifier: LGPL-2.0-or-later 0009 */ 0010 0011 #include "kmime_codecs_p.h" 0012 #include "kmime_debug.h" 0013 0014 #include <QStringDecoder> 0015 #include <QStringEncoder> 0016 0017 namespace KMime { 0018 0019 static const char reservedCharacters[] = "\"()<>@,.;:\\[]="; 0020 0021 QByteArray encodeRFC2047String(QStringView src, const QByteArray &charset, 0022 bool addressHeader) 0023 { 0024 QByteArray result; 0025 int start = 0; 0026 int end = 0; 0027 bool nonAscii = false; 0028 bool useQEncoding = false; 0029 0030 // fromLatin1() is safe here, codecForName() uses toLatin1() internally 0031 QStringEncoder codec(charset.constData()); 0032 0033 QByteArray usedCS; 0034 if (!codec.isValid()) { 0035 //no codec available => try local8Bit and hope the best ;-) 0036 codec = QStringEncoder(QStringEncoder::System); 0037 usedCS = codec.name(); 0038 } else { 0039 if (charset.isEmpty()) { 0040 usedCS = codec.name(); 0041 } else { 0042 usedCS = charset; 0043 } 0044 } 0045 0046 QByteArray encoded8Bit = codec.encode(src); 0047 if (codec.hasError()) { 0048 usedCS = "utf-8"; 0049 codec = QStringEncoder(usedCS.constData()); 0050 encoded8Bit = codec.encode(src); 0051 } 0052 0053 if (usedCS.contains("8859-")) { // use "B"-Encoding for non iso-8859-x charsets 0054 useQEncoding = true; 0055 } 0056 0057 int encoded8BitLength = encoded8Bit.length(); 0058 for (int i = 0; i < encoded8BitLength; i++) { 0059 if (encoded8Bit[i] == ' ') { // encoding starts at word boundaries 0060 start = i + 1; 0061 } 0062 0063 // encode escape character, for japanese encodings... 0064 if (((signed char)encoded8Bit[i] < 0) || (encoded8Bit[i] == '\033') || 0065 (addressHeader && (strchr("\"()<>@,.;:\\[]=", encoded8Bit[i]) != nullptr))) { 0066 end = start; // non us-ascii char found, now we determine where to stop encoding 0067 nonAscii = true; 0068 break; 0069 } 0070 } 0071 0072 if (nonAscii) { 0073 while ((end < encoded8Bit.length()) && (encoded8Bit[end] != ' ')) { 0074 // we encode complete words 0075 end++; 0076 } 0077 0078 for (int x = end; x < encoded8Bit.length(); x++) { 0079 if (((signed char)encoded8Bit[x] < 0) || (encoded8Bit[x] == '\033') || 0080 (addressHeader && (strchr(reservedCharacters, encoded8Bit[x]) != nullptr))) { 0081 end = x; // we found another non-ascii word 0082 0083 while ((end < encoded8Bit.length()) && (encoded8Bit[end] != ' ')) { 0084 // we encode complete words 0085 end++; 0086 } 0087 } 0088 } 0089 0090 result = encoded8Bit.left(start) + "=?" + usedCS; 0091 0092 if (useQEncoding) { 0093 result += "?Q?"; 0094 0095 char hexcode; // "Q"-encoding implementation described in RFC 2047 0096 for (int i = start; i < end; i++) { 0097 char c = encoded8Bit[i]; 0098 if (c == ' ') { // make the result readable with not MIME-capable readers 0099 result += '_'; 0100 } else { 0101 if (((c >= 'a') && (c <= 'z')) || // paranoid mode, encode *all* special chars to avoid problems 0102 ((c >= 'A') && (c <= 'Z')) || // with "From" & "To" headers 0103 ((c >= '0') && (c <= '9'))) { 0104 result += c; 0105 } else { 0106 result += '='; // "stolen" from KMail ;-) 0107 hexcode = ((c & 0xF0) >> 4) + 48; 0108 if (hexcode >= 58) { 0109 hexcode += 7; 0110 } 0111 result += hexcode; 0112 hexcode = (c & 0x0F) + 48; 0113 if (hexcode >= 58) { 0114 hexcode += 7; 0115 } 0116 result += hexcode; 0117 } 0118 } 0119 } 0120 } else { 0121 result += "?B?" + encoded8Bit.mid(start, end - start).toBase64(); 0122 } 0123 0124 result += "?="; 0125 result += encoded8Bit.right(encoded8Bit.length() - end); 0126 } else { 0127 result = encoded8Bit; 0128 } 0129 0130 return result; 0131 } 0132 0133 QByteArray encodeRFC2047Sentence(QStringView src, const QByteArray &charset) 0134 { 0135 QByteArray result; 0136 const QChar *ch = src.constData(); 0137 const int length = src.length(); 0138 int pos = 0; 0139 int wordStart = 0; 0140 0141 //qCDebug(KMIME_LOG) << "Input:" << src; 0142 // Loop over all characters of the string. 0143 // When encountering a split character, RFC-2047-encode the word before it, and add it to the result. 0144 while (pos < length) { 0145 //qCDebug(KMIME_LOG) << "Pos:" << pos << "Result:" << result << "Char:" << ch->toLatin1(); 0146 const bool isAscii = ch->unicode() < 127; 0147 const bool isReserved = (strchr(reservedCharacters, ch->toLatin1()) != nullptr); 0148 if (isAscii && isReserved) { 0149 const int wordSize = pos - wordStart; 0150 if (wordSize > 0) { 0151 const auto word = src.mid(wordStart, wordSize); 0152 result += encodeRFC2047String(word, charset); 0153 } 0154 0155 result += ch->toLatin1(); 0156 wordStart = pos + 1; 0157 } 0158 ch++; 0159 pos++; 0160 } 0161 0162 // Encode the last word 0163 const int wordSize = pos - wordStart; 0164 if (wordSize > 0) { 0165 const auto word = src.mid(wordStart, pos - wordStart); 0166 result += encodeRFC2047String(word, charset); 0167 } 0168 0169 return result; 0170 } 0171 0172 //----------------------------------------------------------------------------- 0173 QByteArray encodeRFC2231String(QStringView str, const QByteArray &charset) 0174 { 0175 if (str.isEmpty()) { 0176 return {}; 0177 } 0178 0179 QStringEncoder codec(charset.constData()); 0180 QByteArray latin; 0181 if (charset == "us-ascii") { 0182 latin = str.toLatin1(); 0183 } else if (codec.isValid()) { 0184 latin = codec.encode(str); 0185 } else { 0186 latin = str.toLocal8Bit(); 0187 } 0188 0189 char *l; 0190 for (l = latin.data(); *l; ++l) { 0191 if (((*l & 0xE0) == 0) || (*l & 0x80)) { 0192 // *l is control character or 8-bit char 0193 break; 0194 } 0195 } 0196 if (!*l) { 0197 return latin; 0198 } 0199 0200 QByteArray result = charset + "''"; 0201 for (l = latin.data(); *l; ++l) { 0202 bool needsQuoting = (*l & 0x80) || (*l == '%'); 0203 if (!needsQuoting) { 0204 const QByteArray especials = "()<>@,;:\"/[]?.= \033"; 0205 int len = especials.length(); 0206 for (int i = 0; i < len; i++) { 0207 if (*l == especials[i]) { 0208 needsQuoting = true; 0209 break; 0210 } 0211 } 0212 } 0213 if (needsQuoting) { 0214 result += '%'; 0215 unsigned char hexcode; 0216 hexcode = ((*l & 0xF0) >> 4) + 48; 0217 if (hexcode >= 58) { 0218 hexcode += 7; 0219 } 0220 result += hexcode; 0221 hexcode = (*l & 0x0F) + 48; 0222 if (hexcode >= 58) { 0223 hexcode += 7; 0224 } 0225 result += hexcode; 0226 } else { 0227 result += *l; 0228 } 0229 } 0230 return result; 0231 } 0232 0233 }