Warning, file /pim/kmime/src/kmime_codecs.cpp was not indexed or was modified since last indexation (in which case cross-reference links may be missing, inaccurate or erroneous).

0001 /*
0002   kmime_codecs.cpp
0003 
0004   KMime, the KDE Internet mail/usenet news message library.
0005   SPDX-FileCopyrightText: 2001 the KMime authors.
0006   See file AUTHORS for details
0007 
0008   SPDX-License-Identifier: LGPL-2.0-or-later
0009 */
0010 
0011 #include "kmime_codecs.h"
0012 #include "kmime_debug.h"
0013 
0014 #include <QTextCodec>
0015 
0016 namespace KMime {
0017 
0018 static const char reservedCharacters[] = "\"()<>@,.;:\\[]=";
0019 
0020 QByteArray encodeRFC2047String(const QString &src, const QByteArray &charset,
0021                                bool addressHeader, bool allow8BitHeaders)
0022 {
0023     QByteArray result;
0024     int start = 0;
0025     int end = 0;
0026     bool nonAscii = false;
0027     bool useQEncoding = false;
0028 
0029     // fromLatin1() is safe here, codecForName() uses toLatin1() internally
0030     const QTextCodec *codec = QTextCodec::codecForName(charset);
0031 
0032     QByteArray usedCS;
0033     if (!codec) {
0034         //no codec available => try local8Bit and hope the best ;-)
0035         codec = QTextCodec::codecForLocale();
0036         usedCS = codec->name();
0037     } else {
0038         if (charset.isEmpty()) {
0039             usedCS = codec->name();
0040         } else {
0041             usedCS = charset;
0042         }
0043     }
0044 
0045     QTextCodec::ConverterState converterState(QTextCodec::IgnoreHeader);
0046     QByteArray encoded8Bit = codec->fromUnicode(src.constData(), src.length(), &converterState);
0047     if (converterState.invalidChars > 0) {
0048         usedCS = "utf-8";
0049         codec = QTextCodec::codecForName(usedCS);
0050         encoded8Bit = codec->fromUnicode(src);
0051     }
0052 
0053     if (usedCS.contains("8859-")) {     // use "B"-Encoding for non iso-8859-x charsets
0054         useQEncoding = true;
0055     }
0056 
0057     if (allow8BitHeaders) {
0058         return encoded8Bit;
0059     }
0060 
0061     int encoded8BitLength = encoded8Bit.length();
0062     for (int i = 0; i < encoded8BitLength; i++) {
0063         if (encoded8Bit[i] == ' ') {   // encoding starts at word boundaries
0064             start = i + 1;
0065         }
0066 
0067         // encode escape character, for japanese encodings...
0068         if (((signed char)encoded8Bit[i] < 0) || (encoded8Bit[i] == '\033') ||
0069                 (addressHeader && (strchr("\"()<>@,.;:\\[]=", encoded8Bit[i]) != nullptr))) {
0070             end = start;   // non us-ascii char found, now we determine where to stop encoding
0071             nonAscii = true;
0072             break;
0073         }
0074     }
0075 
0076     if (nonAscii) {
0077         while ((end < encoded8Bit.length()) && (encoded8Bit[end] != ' ')) {
0078             // we encode complete words
0079             end++;
0080         }
0081 
0082         for (int x = end; x < encoded8Bit.length(); x++) {
0083             if (((signed char)encoded8Bit[x] < 0) || (encoded8Bit[x] == '\033') ||
0084                     (addressHeader && (strchr(reservedCharacters, encoded8Bit[x]) != nullptr))) {
0085                 end = x;     // we found another non-ascii word
0086 
0087                 while ((end < encoded8Bit.length()) && (encoded8Bit[end] != ' ')) {
0088                     // we encode complete words
0089                     end++;
0090                 }
0091             }
0092         }
0093 
0094         result = encoded8Bit.left(start) + "=?" + usedCS;
0095 
0096         if (useQEncoding) {
0097             result += "?Q?";
0098 
0099             char hexcode; // "Q"-encoding implementation described in RFC 2047
0100             for (int i = start; i < end; i++) {
0101                 char c = encoded8Bit[i];
0102                 if (c == ' ') {   // make the result readable with not MIME-capable readers
0103                     result += '_';
0104                 } else {
0105                     if (((c >= 'a') && (c <= 'z')) ||        // paranoid mode, encode *all* special chars to avoid problems
0106                             ((c >= 'A') && (c <= 'Z')) ||        // with "From" & "To" headers
0107                             ((c >= '0') && (c <= '9'))) {
0108                         result += c;
0109                     } else {
0110                         result += '=';                 // "stolen" from KMail ;-)
0111                         hexcode = ((c & 0xF0) >> 4) + 48;
0112                         if (hexcode >= 58) {
0113                             hexcode += 7;
0114                         }
0115                         result += hexcode;
0116                         hexcode = (c & 0x0F) + 48;
0117                         if (hexcode >= 58) {
0118                             hexcode += 7;
0119                         }
0120                         result += hexcode;
0121                     }
0122                 }
0123             }
0124         } else {
0125             result += "?B?" + encoded8Bit.mid(start, end - start).toBase64();
0126         }
0127 
0128         result += "?=";
0129         result += encoded8Bit.right(encoded8Bit.length() - end);
0130     } else {
0131         result = encoded8Bit;
0132     }
0133 
0134     return result;
0135 }
0136 
0137 QByteArray encodeRFC2047Sentence(const QString &src, const QByteArray &charset)
0138 {
0139     QByteArray result;
0140     const QChar *ch = src.constData();
0141     const int length = src.length();
0142     int pos = 0;
0143     int wordStart = 0;
0144 
0145     //qCDebug(KMIME_LOG) << "Input:" << src;
0146     // Loop over all characters of the string.
0147     // When encountering a split character, RFC-2047-encode the word before it, and add it to the result.
0148     while (pos < length) {
0149         //qCDebug(KMIME_LOG) << "Pos:" << pos << "Result:" << result << "Char:" << ch->toLatin1();
0150         const bool isAscii = ch->unicode() < 127;
0151         const bool isReserved = (strchr(reservedCharacters, ch->toLatin1()) != nullptr);
0152         if (isAscii && isReserved) {
0153             const int wordSize = pos - wordStart;
0154             if (wordSize > 0) {
0155                 const QString word = src.mid(wordStart, wordSize);
0156                 result += encodeRFC2047String(word, charset);
0157             }
0158 
0159             result += ch->toLatin1();
0160             wordStart = pos + 1;
0161         }
0162         ch++;
0163         pos++;
0164     }
0165 
0166     // Encode the last word
0167     const int wordSize = pos - wordStart;
0168     if (wordSize > 0) {
0169         const QString word = src.mid(wordStart, pos - wordStart);
0170         result += encodeRFC2047String(word, charset);
0171     }
0172 
0173     return result;
0174 }
0175 
0176 //-----------------------------------------------------------------------------
0177 QByteArray encodeRFC2231String(const QString &str, const QByteArray &charset)
0178 {
0179     if (str.isEmpty()) {
0180       return {};
0181     }
0182 
0183     const QTextCodec *codec = QTextCodec::codecForName(charset);
0184     QByteArray latin;
0185     if (charset == "us-ascii") {
0186         latin = str.toLatin1();
0187     } else if (codec) {
0188         latin = codec->fromUnicode(str);
0189     } else {
0190         latin = str.toLocal8Bit();
0191     }
0192 
0193     char *l;
0194     for (l = latin.data(); *l; ++l) {
0195         if (((*l & 0xE0) == 0) || (*l & 0x80)) {
0196             // *l is control character or 8-bit char
0197             break;
0198         }
0199     }
0200     if (!*l) {
0201         return latin;
0202     }
0203 
0204     QByteArray result = charset + "''";
0205     for (l = latin.data(); *l; ++l) {
0206         bool needsQuoting = (*l & 0x80) || (*l == '%');
0207         if (!needsQuoting) {
0208             const QByteArray especials = "()<>@,;:\"/[]?.= \033";
0209             int len = especials.length();
0210             for (int i = 0; i < len; i++) {
0211                 if (*l == especials[i]) {
0212                     needsQuoting = true;
0213                     break;
0214                 }
0215             }
0216         }
0217         if (needsQuoting) {
0218             result += '%';
0219             unsigned char hexcode;
0220             hexcode = ((*l & 0xF0) >> 4) + 48;
0221             if (hexcode >= 58) {
0222                 hexcode += 7;
0223             }
0224             result += hexcode;
0225             hexcode = (*l & 0x0F) + 48;
0226             if (hexcode >= 58) {
0227                 hexcode += 7;
0228             }
0229             result += hexcode;
0230         } else {
0231             result += *l;
0232         }
0233     }
0234     return result;
0235 }
0236 
0237 //-----------------------------------------------------------------------------
0238 QString decodeRFC2231String(const QByteArray &str, QByteArray &usedCS, const QByteArray &defaultCS,
0239                             bool forceCS)
0240 {
0241     int p = str.indexOf('\'');
0242     if (p < 0) {
0243         auto codec = QTextCodec::codecForName(defaultCS);
0244         if (!codec) {
0245             codec = QTextCodec::codecForLocale();
0246         }
0247         return codec->toUnicode(str);
0248     }
0249 
0250     QByteArray charset = str.left(p);
0251 
0252     QByteArray st = str.mid(str.lastIndexOf('\'') + 1);
0253 
0254     char ch;
0255     char ch2;
0256     p = 0;
0257     while (p < st.length()) {
0258         if (st.at(p) == 37) {
0259             // Only try to decode the percent-encoded character if the percent sign
0260             // is really followed by two other characters, see testcase at bug 163024
0261             if (p + 2 < st.length()) {
0262                 ch = st.at(p + 1) - 48;
0263                 if (ch > 16) {
0264                     ch -= 7;
0265                 }
0266                 ch2 = st.at(p + 2) - 48;
0267                 if (ch2 > 16) {
0268                     ch2 -= 7;
0269                 }
0270                 st[p] = ch * 16 + ch2;
0271                 st.remove(p + 1, 2);
0272             }
0273         }
0274         p++;
0275     }
0276     qCDebug(KMIME_LOG) << "Got pre-decoded:" << st;
0277     const QTextCodec *charsetcodec = QTextCodec::codecForName(charset);
0278     if (!charsetcodec || forceCS) {
0279         charsetcodec = QTextCodec::codecForName(defaultCS);
0280     }
0281     if (!charsetcodec) {
0282         charsetcodec = QTextCodec::codecForLocale();
0283     }
0284 
0285     usedCS = charsetcodec->name();
0286     return charsetcodec->toUnicode(st);
0287 }
0288 
0289 }