Warning, file /pim/kmime/src/kmime_codecs.cpp was not indexed or was modified since last indexation (in which case cross-reference links may be missing, inaccurate or erroneous).

0001 /*
0002   kmime_codecs.cpp
0003 
0004   KMime, the KDE Internet mail/usenet news message library.
0005   SPDX-FileCopyrightText: 2001 the KMime authors.
0006   See file AUTHORS for details
0007 
0008   SPDX-License-Identifier: LGPL-2.0-or-later
0009 */
0010 
0011 #include "kmime_codecs.h"
0012 #include "kmime_debug.h"
0013 
0014 #include <QTextCodec>
0015 
0016 namespace KMime {
0017 
0018 static const char reservedCharacters[] = "\"()<>@,.;:\\[]=";
0019 
0020 QByteArray encodeRFC2047String(const QString &src, const QByteArray &charset,
0021                                bool addressHeader, bool allow8BitHeaders)
0022 {
0023     QByteArray result;
0024     int start = 0;
0025     int end = 0;
0026     bool nonAscii = false;
0027     bool useQEncoding = false;
0028 
0029     // fromLatin1() is safe here, codecForName() uses toLatin1() internally
0030     const QTextCodec *codec = QTextCodec::codecForName(charset);
0031 
0032     QByteArray usedCS;
0033     if (!codec) {
0034         //no codec available => try local8Bit and hope the best ;-)
0035         codec = QTextCodec::codecForLocale();
0036         usedCS = codec->name();
0037     } else {
0038         if (charset.isEmpty()) {
0039             usedCS = codec->name();
0040         } else {
0041             usedCS = charset;
0042         }
0043     }
0044 
0045     QTextCodec::ConverterState converterState(QTextCodec::IgnoreHeader);
0046     QByteArray encoded8Bit = codec->fromUnicode(src.constData(), src.length(), &converterState);
0047     if (converterState.invalidChars > 0) {
0048         usedCS = "utf-8";
0049         codec = QTextCodec::codecForName(usedCS);
0050         encoded8Bit = codec->fromUnicode(src);
0051     }
0052 
0053     if (usedCS.contains("8859-")) {     // use "B"-Encoding for non iso-8859-x charsets
0054         useQEncoding = true;
0055     }
0056 
0057     if (allow8BitHeaders) {
0058         return encoded8Bit;
0059     }
0060 
0061     int encoded8BitLength = encoded8Bit.length();
0062     for (int i = 0; i < encoded8BitLength; i++) {
0063         if (encoded8Bit[i] == ' ') {   // encoding starts at word boundaries
0064             start = i + 1;
0065         }
0066 
0067         // encode escape character, for japanese encodings...
0068         if (((signed char)encoded8Bit[i] < 0) || (encoded8Bit[i] == '\033') ||
0069                 (addressHeader && (strchr("\"()<>@,.;:\\[]=", encoded8Bit[i]) != nullptr))) {
0070             end = start;   // non us-ascii char found, now we determine where to stop encoding
0071             nonAscii = true;
0072             break;
0073         }
0074     }
0075 
0076     if (nonAscii) {
0077         while ((end < encoded8Bit.length()) && (encoded8Bit[end] != ' ')) {
0078             // we encode complete words
0079             end++;
0080         }
0081 
0082         for (int x = end; x < encoded8Bit.length(); x++) {
0083             if (((signed char)encoded8Bit[x] < 0) || (encoded8Bit[x] == '\033') ||
0084                     (addressHeader && (strchr(reservedCharacters, encoded8Bit[x]) != nullptr))) {
0085                 end = x;     // we found another non-ascii word
0086 
0087                 while ((end < encoded8Bit.length()) && (encoded8Bit[end] != ' ')) {
0088                     // we encode complete words
0089                     end++;
0090                 }
0091             }
0092         }
0093 
0094         result = encoded8Bit.left(start) + "=?" + usedCS;
0095 
0096         if (useQEncoding) {
0097             result += "?Q?";
0098 
0099             char c;
0100             char hexcode; // "Q"-encoding implementation described in RFC 2047
0101             for (int i = start; i < end; i++) {
0102                 c = encoded8Bit[i];
0103                 if (c == ' ') {   // make the result readable with not MIME-capable readers
0104                     result += '_';
0105                 } else {
0106                     if (((c >= 'a') && (c <= 'z')) ||        // paranoid mode, encode *all* special chars to avoid problems
0107                             ((c >= 'A') && (c <= 'Z')) ||        // with "From" & "To" headers
0108                             ((c >= '0') && (c <= '9'))) {
0109                         result += c;
0110                     } else {
0111                         result += '=';                 // "stolen" from KMail ;-)
0112                         hexcode = ((c & 0xF0) >> 4) + 48;
0113                         if (hexcode >= 58) {
0114                             hexcode += 7;
0115                         }
0116                         result += hexcode;
0117                         hexcode = (c & 0x0F) + 48;
0118                         if (hexcode >= 58) {
0119                             hexcode += 7;
0120                         }
0121                         result += hexcode;
0122                     }
0123                 }
0124             }
0125         } else {
0126             result += "?B?" + encoded8Bit.mid(start, end - start).toBase64();
0127         }
0128 
0129         result += "?=";
0130         result += encoded8Bit.right(encoded8Bit.length() - end);
0131     } else {
0132         result = encoded8Bit;
0133     }
0134 
0135     return result;
0136 }
0137 
0138 QByteArray encodeRFC2047Sentence(const QString &src, const QByteArray &charset)
0139 {
0140     QByteArray result;
0141     const QChar *ch = src.constData();
0142     const int length = src.length();
0143     int pos = 0;
0144     int wordStart = 0;
0145 
0146     //qCDebug(KMIME_LOG) << "Input:" << src;
0147     // Loop over all characters of the string.
0148     // When encountering a split character, RFC-2047-encode the word before it, and add it to the result.
0149     while (pos < length) {
0150         //qCDebug(KMIME_LOG) << "Pos:" << pos << "Result:" << result << "Char:" << ch->toLatin1();
0151         const bool isAscii = ch->unicode() < 127;
0152         const bool isReserved = (strchr(reservedCharacters, ch->toLatin1()) != nullptr);
0153         if (isAscii && isReserved) {
0154             const int wordSize = pos - wordStart;
0155             if (wordSize > 0) {
0156                 const QString word = src.mid(wordStart, wordSize);
0157                 result += encodeRFC2047String(word, charset);
0158             }
0159 
0160             result += ch->toLatin1();
0161             wordStart = pos + 1;
0162         }
0163         ch++;
0164         pos++;
0165     }
0166 
0167     // Encode the last word
0168     const int wordSize = pos - wordStart;
0169     if (wordSize > 0) {
0170         const QString word = src.mid(wordStart, pos - wordStart);
0171         result += encodeRFC2047String(word, charset);
0172     }
0173 
0174     return result;
0175 }
0176 
0177 //-----------------------------------------------------------------------------
0178 QByteArray encodeRFC2231String(const QString &str, const QByteArray &charset)
0179 {
0180     if (str.isEmpty()) {
0181       return {};
0182     }
0183 
0184     const QTextCodec *codec = QTextCodec::codecForName(charset);
0185     QByteArray latin;
0186     if (charset == "us-ascii") {
0187         latin = str.toLatin1();
0188     } else if (codec) {
0189         latin = codec->fromUnicode(str);
0190     } else {
0191         latin = str.toLocal8Bit();
0192     }
0193 
0194     char *l;
0195     for (l = latin.data(); *l; ++l) {
0196         if (((*l & 0xE0) == 0) || (*l & 0x80)) {
0197             // *l is control character or 8-bit char
0198             break;
0199         }
0200     }
0201     if (!*l) {
0202         return latin;
0203     }
0204 
0205     QByteArray result = charset + "''";
0206     for (l = latin.data(); *l; ++l) {
0207         bool needsQuoting = (*l & 0x80) || (*l == '%');
0208         if (!needsQuoting) {
0209             const QByteArray especials = "()<>@,;:\"/[]?.= \033";
0210             int len = especials.length();
0211             for (int i = 0; i < len; i++) {
0212                 if (*l == especials[i]) {
0213                     needsQuoting = true;
0214                     break;
0215                 }
0216             }
0217         }
0218         if (needsQuoting) {
0219             result += '%';
0220             unsigned char hexcode;
0221             hexcode = ((*l & 0xF0) >> 4) + 48;
0222             if (hexcode >= 58) {
0223                 hexcode += 7;
0224             }
0225             result += hexcode;
0226             hexcode = (*l & 0x0F) + 48;
0227             if (hexcode >= 58) {
0228                 hexcode += 7;
0229             }
0230             result += hexcode;
0231         } else {
0232             result += *l;
0233         }
0234     }
0235     return result;
0236 }
0237 
0238 //-----------------------------------------------------------------------------
0239 QString decodeRFC2231String(const QByteArray &str, QByteArray &usedCS, const QByteArray &defaultCS,
0240                             bool forceCS)
0241 {
0242     int p = str.indexOf('\'');
0243     if (p < 0) {
0244         auto codec = QTextCodec::codecForName(defaultCS);
0245         if (!codec) {
0246             codec = QTextCodec::codecForLocale();
0247         }
0248         return codec->toUnicode(str);
0249     }
0250 
0251     QByteArray charset = str.left(p);
0252 
0253     QByteArray st = str.mid(str.lastIndexOf('\'') + 1);
0254 
0255     char ch;
0256     char ch2;
0257     p = 0;
0258     while (p < st.length()) {
0259         if (st.at(p) == 37) {
0260             // Only try to decode the percent-encoded character if the percent sign
0261             // is really followed by two other characters, see testcase at bug 163024
0262             if (p + 2 < st.length()) {
0263                 ch = st.at(p + 1) - 48;
0264                 if (ch > 16) {
0265                     ch -= 7;
0266                 }
0267                 ch2 = st.at(p + 2) - 48;
0268                 if (ch2 > 16) {
0269                     ch2 -= 7;
0270                 }
0271                 st[p] = ch * 16 + ch2;
0272                 st.remove(p + 1, 2);
0273             }
0274         }
0275         p++;
0276     }
0277     qCDebug(KMIME_LOG) << "Got pre-decoded:" << st;
0278     const QTextCodec *charsetcodec = QTextCodec::codecForName(charset);
0279     if (!charsetcodec || forceCS) {
0280         charsetcodec = QTextCodec::codecForName(defaultCS);
0281     }
0282     if (!charsetcodec) {
0283         charsetcodec = QTextCodec::codecForLocale();
0284     }
0285 
0286     usedCS = charsetcodec->name();
0287     return charsetcodec->toUnicode(st);
0288 }
0289 
0290 }