kmime/src/kmime_codecs.cpp

0001 /*
0002   kmime_codecs.cpp
0003
0004   KMime, the KDE Internet mail/usenet news message library.
0005   SPDX-FileCopyrightText: 2001 the KMime authors.
0006   See file AUTHORS for details
0007
0008   SPDX-License-Identifier: LGPL-2.0-or-later
0009 */
0010
0011 #include "kmime_codecs_p.h"
0012 #include "kmime_debug.h"
0013
0014 #include <QStringDecoder>
0015 #include <QStringEncoder>
0016
0017 namespace KMime {
0018
0019 static const char reservedCharacters[] = "\"()<>@,.;:\\[]=";
0020
0021 QByteArray encodeRFC2047String(QStringView src, const QByteArray &charset,
0022                                bool addressHeader)
0023 {
0024     QByteArray result;
0025     int start = 0;
0026     int end = 0;
0027     bool nonAscii = false;
0028     bool useQEncoding = false;
0029
0030     // fromLatin1() is safe here, codecForName() uses toLatin1() internally
0031     QStringEncoder codec(charset.constData());
0032
0033     QByteArray usedCS;
0034     if (!codec.isValid()) {
0035         //no codec available => try local8Bit and hope the best ;-)
0036         codec = QStringEncoder(QStringEncoder::System);
0037         usedCS = codec.name();
0038     } else {
0039         if (charset.isEmpty()) {
0040             usedCS = codec.name();
0041         } else {
0042             usedCS = charset;
0043         }
0044     }
0045
0046     QByteArray encoded8Bit = codec.encode(src);
0047     if (codec.hasError()) {
0048         usedCS = "utf-8";
0049         codec = QStringEncoder(usedCS.constData());
0050         encoded8Bit = codec.encode(src);
0051     }
0052
0053     if (usedCS.contains("8859-")) {     // use "B"-Encoding for non iso-8859-x charsets
0054         useQEncoding = true;
0055     }
0056
0057     int encoded8BitLength = encoded8Bit.length();
0058     for (int i = 0; i < encoded8BitLength; i++) {
0059         if (encoded8Bit[i] == ' ') {   // encoding starts at word boundaries
0060             start = i + 1;
0061         }
0062
0063         // encode escape character, for japanese encodings...
0064         if (((signed char)encoded8Bit[i] < 0) || (encoded8Bit[i] == '\033') ||
0065                 (addressHeader && (strchr("\"()<>@,.;:\\[]=", encoded8Bit[i]) != nullptr))) {
0066             end = start;   // non us-ascii char found, now we determine where to stop encoding
0067             nonAscii = true;
0068             break;
0069         }
0070     }
0071
0072     if (nonAscii) {
0073         while ((end < encoded8Bit.length()) && (encoded8Bit[end] != ' ')) {
0074             // we encode complete words
0075             end++;
0076         }
0077
0078         for (int x = end; x < encoded8Bit.length(); x++) {
0079             if (((signed char)encoded8Bit[x] < 0) || (encoded8Bit[x] == '\033') ||
0080                     (addressHeader && (strchr(reservedCharacters, encoded8Bit[x]) != nullptr))) {
0081                 end = x;     // we found another non-ascii word
0082
0083                 while ((end < encoded8Bit.length()) && (encoded8Bit[end] != ' ')) {
0084                     // we encode complete words
0085                     end++;
0086                 }
0087             }
0088         }
0089
0090         result = encoded8Bit.left(start) + "=?" + usedCS;
0091
0092         if (useQEncoding) {
0093             result += "?Q?";
0094
0095             char hexcode; // "Q"-encoding implementation described in RFC 2047
0096             for (int i = start; i < end; i++) {
0097                 char c = encoded8Bit[i];
0098                 if (c == ' ') {   // make the result readable with not MIME-capable readers
0099                     result += '_';
0100                 } else {
0101                     if (((c >= 'a') && (c <= 'z')) ||        // paranoid mode, encode *all* special chars to avoid problems
0102                             ((c >= 'A') && (c <= 'Z')) ||        // with "From" & "To" headers
0103                             ((c >= '0') && (c <= '9'))) {
0104                         result += c;
0105                     } else {
0106                         result += '=';                 // "stolen" from KMail ;-)
0107                         hexcode = ((c & 0xF0) >> 4) + 48;
0108                         if (hexcode >= 58) {
0109                             hexcode += 7;
0110                         }
0111                         result += hexcode;
0112                         hexcode = (c & 0x0F) + 48;
0113                         if (hexcode >= 58) {
0114                             hexcode += 7;
0115                         }
0116                         result += hexcode;
0117                     }
0118                 }
0119             }
0120         } else {
0121             result += "?B?" + encoded8Bit.mid(start, end - start).toBase64();
0122         }
0123
0124         result += "?=";
0125         result += encoded8Bit.right(encoded8Bit.length() - end);
0126     } else {
0127         result = encoded8Bit;
0128     }
0129
0130     return result;
0131 }
0132
0133 QByteArray encodeRFC2047Sentence(QStringView src, const QByteArray &charset)
0134 {
0135     QByteArray result;
0136     const QChar *ch = src.constData();
0137     const int length = src.length();
0138     int pos = 0;
0139     int wordStart = 0;
0140
0141     //qCDebug(KMIME_LOG) << "Input:" << src;
0142     // Loop over all characters of the string.
0143     // When encountering a split character, RFC-2047-encode the word before it, and add it to the result.
0144     while (pos < length) {
0145         //qCDebug(KMIME_LOG) << "Pos:" << pos << "Result:" << result << "Char:" << ch->toLatin1();
0146         const bool isAscii = ch->unicode() < 127;
0147         const bool isReserved = (strchr(reservedCharacters, ch->toLatin1()) != nullptr);
0148         if (isAscii && isReserved) {
0149             const int wordSize = pos - wordStart;
0150             if (wordSize > 0) {
0151                 const auto word = src.mid(wordStart, wordSize);
0152                 result += encodeRFC2047String(word, charset);
0153             }
0154
0155             result += ch->toLatin1();
0156             wordStart = pos + 1;
0157         }
0158         ch++;
0159         pos++;
0160     }
0161
0162     // Encode the last word
0163     const int wordSize = pos - wordStart;
0164     if (wordSize > 0) {
0165         const auto word = src.mid(wordStart, pos - wordStart);
0166         result += encodeRFC2047String(word, charset);
0167     }
0168
0169     return result;
0170 }
0171
0172 //-----------------------------------------------------------------------------
0173 QByteArray encodeRFC2231String(QStringView str, const QByteArray &charset)
0174 {
0175     if (str.isEmpty()) {
0176       return {};
0177     }
0178
0179     QStringEncoder codec(charset.constData());
0180     QByteArray latin;
0181     if (charset == "us-ascii") {
0182         latin = str.toLatin1();
0183     } else if (codec.isValid()) {
0184         latin = codec.encode(str);
0185     } else {
0186         latin = str.toLocal8Bit();
0187     }
0188
0189     char *l;
0190     for (l = latin.data(); *l; ++l) {
0191         if (((*l & 0xE0) == 0) || (*l & 0x80)) {
0192             // *l is control character or 8-bit char
0193             break;
0194         }
0195     }
0196     if (!*l) {
0197         return latin;
0198     }
0199
0200     QByteArray result = charset + "''";
0201     for (l = latin.data(); *l; ++l) {
0202         bool needsQuoting = (*l & 0x80) || (*l == '%');
0203         if (!needsQuoting) {
0204             const QByteArray especials = "()<>@,;:\"/[]?.= \033";
0205             int len = especials.length();
0206             for (int i = 0; i < len; i++) {
0207                 if (*l == especials[i]) {
0208                     needsQuoting = true;
0209                     break;
0210                 }
0211             }
0212         }
0213         if (needsQuoting) {
0214             result += '%';
0215             unsigned char hexcode;
0216             hexcode = ((*l & 0xF0) >> 4) + 48;
0217             if (hexcode >= 58) {
0218                 hexcode += 7;
0219             }
0220             result += hexcode;
0221             hexcode = (*l & 0x0F) + 48;
0222             if (hexcode >= 58) {
0223                 hexcode += 7;
0224             }
0225             result += hexcode;
0226         } else {
0227             result += *l;
0228         }
0229     }
0230     return result;
0231 }
0232
0233 }