File indexing completed on 2022-11-23 12:03:45

0001 /*  -*- c++ -*-
0002     kmime_header_parsing.cpp
0003 
0004     KMime, the KDE Internet mail/usenet news message library.
0005     SPDX-FileCopyrightText: 2001-2002 Marc Mutz <mutz@kde.org>
0006 
0007     SPDX-License-Identifier: LGPL-2.0-or-later
0008 */
0009 
0010 #include "kmime_header_parsing.h"
0011 
0012 #include "kmime_headerfactory_p.h"
0013 #include "kmime_headers.h"
0014 #include "kmime_util.h"
0015 #include "kmime_util_p.h"
0016 #include "kmime_codecs.h"
0017 #include "kmime_dateformatter.h"
0018 #include "kmime_debug.h"
0019 #include "kmime_warning.h"
0020 
0021 #include <KCharsets>
0022 
0023 #include <KCodecs>
0024 
0025 #include <QTextCodec>
0026 #include <QMap>
0027 
0028 #include <cassert>
0029 #include <cctype> // for isdigit
0030 
0031 using namespace KMime;
0032 using namespace KMime::Types;
0033 
0034 namespace KMime
0035 {
0036 
0037     namespace Types
0038     {
0039         // Optimization to avoid allocating QStrings when the value isn't encoded
0040         struct KMIME_EXPORT QStringOrQPair {
0041             QStringOrQPair() : qstring(), qpair(nullptr, 0) {}
0042             QString qstring;
0043             QPair<const char *, int> qpair;
0044         };
0045     } // namespace Types
0046 
0047 namespace HeaderParsing
0048 {
0049 
0050 // parse the encoded-word (scursor points to after the initial '=')
0051 bool parseEncodedWord(const char *&scursor, const char *const send,
0052                       QString &result, QByteArray &language,
0053                       QByteArray &usedCS, const QByteArray &defaultCS,
0054                       bool forceCS)
0055 {
0056     // make sure the caller already did a bit of the work.
0057     assert(*(scursor - 1) == '=');
0058 
0059     //
0060     // STEP 1:
0061     // scan for the charset/language portion of the encoded-word
0062     //
0063 
0064     char ch = *scursor++;
0065 
0066     if (ch != '?') {
0067         // qCDebug(KMIME_LOG) << "first";
0068         //KMIME_WARN_PREMATURE_END_OF( EncodedWord );
0069         return false;
0070     }
0071 
0072     // remember start of charset (ie. just after the initial "=?") and
0073     // language (just after the first '*') fields:
0074     const char *charsetStart = scursor;
0075     const char *languageStart = nullptr;
0076 
0077     // find delimiting '?' (and the '*' separating charset and language
0078     // tags, if any):
0079     for (; scursor != send ; scursor++) {
0080         if (*scursor == '?') {
0081             break;
0082         } else if (*scursor == '*' && languageStart == nullptr) {
0083             languageStart = scursor + 1;
0084         }
0085     }
0086 
0087     // not found? can't be an encoded-word!
0088     if (scursor == send || *scursor != '?') {
0089         // qCDebug(KMIME_LOG) << "second";
0090         KMIME_WARN_PREMATURE_END_OF(EncodedWord);
0091         return false;
0092     }
0093 
0094     // extract the language information, if any (if languageStart is 0,
0095     // language will be null, too):
0096     QByteArray maybeLanguage(languageStart, scursor - languageStart);
0097     // extract charset information (keep in mind: the size given to the
0098     // ctor is one off due to the \0 terminator):
0099     QByteArray maybeCharset(charsetStart,
0100                             (languageStart ? languageStart - 1 : scursor) - charsetStart);
0101 
0102     //
0103     // STEP 2:
0104     // scan for the encoding portion of the encoded-word
0105     //
0106 
0107     // remember start of encoding (just _after_ the second '?'):
0108     scursor++;
0109     const char *encodingStart = scursor;
0110 
0111     // find next '?' (ending the encoding tag):
0112     for (; scursor != send ; scursor++) {
0113         if (*scursor == '?') {
0114             break;
0115         }
0116     }
0117 
0118     // not found? Can't be an encoded-word!
0119     if (scursor == send || *scursor != '?') {
0120         // qCDebug(KMIME_LOG) << "third";
0121         KMIME_WARN_PREMATURE_END_OF(EncodedWord);
0122         return false;
0123     }
0124 
0125     // extract the encoding information:
0126     QByteArray maybeEncoding(encodingStart, scursor - encodingStart);
0127 
0128     // qCDebug(KMIME_LOG) << "parseEncodedWord: found charset == \"" << maybeCharset
0129     //         << "\"; language == \"" << maybeLanguage
0130     //         << "\"; encoding == \"" << maybeEncoding << "\"";
0131 
0132     //
0133     // STEP 3:
0134     // scan for encoded-text portion of encoded-word
0135     //
0136 
0137     // remember start of encoded-text (just after the third '?'):
0138     scursor++;
0139     const char *encodedTextStart = scursor;
0140 
0141     // find the '?=' sequence (ending the encoded-text):
0142     for (; scursor != send ; scursor++) {
0143         if (*scursor == '?') {
0144             if (scursor + 1 != send) {
0145                 if (*(scursor + 1) != '=') {     // We expect a '=' after the '?', but we got something else; ignore
0146                     KMIME_WARN << "Stray '?' in q-encoded word, ignoring this.";
0147                     continue;
0148                 } else { // yep, found a '?=' sequence
0149                     scursor += 2;
0150                     break;
0151                 }
0152             } else { // The '?' is the last char, but we need a '=' after it!
0153                 KMIME_WARN_PREMATURE_END_OF(EncodedWord);
0154                 return false;
0155             }
0156         }
0157     }
0158 
0159     if (*(scursor - 2) != '?' || *(scursor - 1) != '=' ||
0160             scursor < encodedTextStart + 2) {
0161         KMIME_WARN_PREMATURE_END_OF(EncodedWord);
0162         return false;
0163     }
0164 
0165     // set end sentinel for encoded-text:
0166     const char *const encodedTextEnd = scursor - 2;
0167 
0168     //
0169     // STEP 4:
0170     // setup decoders for the transfer encoding and the charset
0171     //
0172 
0173     // try if there's a codec for the encoding found:
0174     KCodecs::Codec *codec = KCodecs::Codec::codecForName(maybeEncoding);
0175     if (!codec) {
0176         KMIME_WARN_UNKNOWN(Encoding, maybeEncoding);
0177         return false;
0178     }
0179 
0180     // get an instance of a corresponding decoder:
0181     KCodecs::Decoder *dec = codec->makeDecoder();
0182     assert(dec);
0183 
0184     // try if there's a (text)codec for the charset found:
0185     bool matchOK = false;
0186     QTextCodec *textCodec = nullptr;
0187     if (forceCS || maybeCharset.isEmpty()) {
0188         textCodec = KCharsets::charsets()->codecForName(QLatin1String(defaultCS), matchOK);
0189         usedCS = cachedCharset(defaultCS);
0190     } else {
0191         textCodec = KCharsets::charsets()->codecForName(QLatin1String(maybeCharset), matchOK);
0192         if (!matchOK) {    //no suitable codec found => use default charset
0193             textCodec = KCharsets::charsets()->codecForName(QLatin1String(defaultCS), matchOK);
0194             usedCS = cachedCharset(defaultCS);
0195         } else {
0196             usedCS = cachedCharset(maybeCharset);
0197         }
0198     }
0199 
0200     if (!matchOK || !textCodec) {
0201         KMIME_WARN_UNKNOWN(Charset, maybeCharset);
0202         delete dec;
0203         return false;
0204     };
0205 
0206     // qCDebug(KMIME_LOG) << "mimeName(): \"" << textCodec->name() << "\"";
0207 
0208     // allocate a temporary buffer to store the 8bit text:
0209     int encodedTextLength = encodedTextEnd - encodedTextStart;
0210     QByteArray buffer;
0211     buffer.resize(codec->maxDecodedSizeFor(encodedTextLength));
0212     char *bbegin = buffer.data();
0213     char *bend = bbegin + buffer.length();
0214 
0215     //
0216     // STEP 5:
0217     // do the actual decoding
0218     //
0219 
0220     if (!dec->decode(encodedTextStart, encodedTextEnd, bbegin, bend)) {
0221         KMIME_WARN << codec->name() << "codec lies about its maxDecodedSizeFor("
0222                    << encodedTextLength << ")\nresult may be truncated";
0223     }
0224 
0225     result = textCodec->toUnicode(buffer.data(), bbegin - buffer.data());
0226 
0227     // qCDebug(KMIME_LOG) << "result now: \"" << result << "\"";
0228     // cleanup:
0229     delete dec;
0230     language = maybeLanguage;
0231 
0232     return true;
0233 }
0234 
0235 static inline void eatWhiteSpace(const char *&scursor, const char *const send)
0236 {
0237     while (scursor != send &&
0238             (*scursor == ' ' || *scursor == '\n' ||
0239              *scursor == '\t' || *scursor == '\r')) {
0240         scursor++;
0241     }
0242 }
0243 
0244 bool parseAtom(const char*&scursor, const char *const send,
0245                QByteArray &result, bool allow8Bit)
0246 {
0247     QPair<const char *, int> maybeResult;
0248 
0249     if (parseAtom(scursor, send, maybeResult, allow8Bit)) {
0250         result = QByteArray(maybeResult.first, maybeResult.second);
0251         return true;
0252     }
0253 
0254     return false;
0255 }
0256 
0257 bool parseAtom(const char*&scursor, const char *const send,
0258                QPair<const char *, int> &result, bool allow8Bit)
0259 {
0260     bool success = false;
0261     const char *start = scursor;
0262 
0263     while (scursor != send) {
0264         signed char ch = *scursor++;
0265         if (ch > 0 && isAText(ch)) {
0266             // AText: OK
0267             success = true;
0268         } else if (allow8Bit && ch < 0) {
0269             // 8bit char: not OK, but be tolerant.
0270             KMIME_WARN_8BIT(ch);
0271             success = true;
0272         } else {
0273             // CTL or special - marking the end of the atom:
0274             // re-set sursor to point to the offending
0275             // char and return:
0276             scursor--;
0277             break;
0278         }
0279     }
0280     result.first = start;
0281     result.second = scursor - start;
0282     return success;
0283 }
0284 
0285 bool parseToken(const char*&scursor, const char *const send,
0286                 QByteArray &result, ParseTokenFlags flags)
0287 {
0288     QPair<const char *, int> maybeResult;
0289 
0290     if (parseToken(scursor, send, maybeResult, flags)) {
0291         result = QByteArray(maybeResult.first, maybeResult.second);
0292         return true;
0293     }
0294 
0295     return false;
0296 }
0297 
0298 bool parseToken(const char*&scursor, const char *const send,
0299                 QPair<const char *, int> &result, ParseTokenFlags flags)
0300 {
0301     bool success = false;
0302     const char *start = scursor;
0303 
0304     while (scursor != send) {
0305         signed char ch = *scursor++;
0306         if (ch > 0 && isTText(ch)) {
0307             // TText: OK
0308             success = true;
0309         } else if ((flags & ParseTokenAllow8Bit) && ch < 0) {
0310             // 8bit char: not OK, but be tolerant.
0311             KMIME_WARN_8BIT(ch);
0312             success = true;
0313         } else if ((flags & ParseTokenRelaxedTText) && ch == '/') {
0314             success = true;
0315         } else {
0316             // CTL or tspecial - marking the end of the atom:
0317             // re-set sursor to point to the offending
0318             // char and return:
0319             scursor--;
0320             break;
0321         }
0322     }
0323     result.first = start;
0324     result.second = scursor - start;
0325     return success;
0326 }
0327 
0328 #define READ_ch_OR_FAIL if ( scursor == send ) {        \
0329         KMIME_WARN_PREMATURE_END_OF( GenericQuotedString ); \
0330         return false;                                       \
0331     } else {                                              \
0332         ch = *scursor++;                                    \
0333     }
0334 
0335 // known issues:
0336 //
0337 // - doesn't handle quoted CRLF
0338 
0339 bool parseGenericQuotedString(const char *&scursor, const char *const send,
0340                               QString &result, bool isCRLF,
0341                               const char openChar, const char closeChar)
0342 {
0343     // We are in a quoted-string or domain-literal or comment and the
0344     // cursor points to the first char after the openChar.
0345     // We will apply unfolding and quoted-pair removal.
0346     // We return when we either encounter the end or unescaped openChar
0347     // or closeChar.
0348     assert(*(scursor - 1) == openChar || *(scursor - 1) == closeChar);
0349 
0350     while (scursor != send) {
0351         char ch = *scursor++;
0352 
0353         if (ch == closeChar || ch == openChar) {
0354             // end of quoted-string or another opening char:
0355             // let caller decide what to do.
0356             return true;
0357         }
0358 
0359         switch (ch) {
0360         case '\\':      // quoted-pair
0361             // misses "\" CRLF LWSP-char handling, see rfc822, 3.4.5
0362             READ_ch_OR_FAIL;
0363             KMIME_WARN_IF_8BIT(ch);
0364             result += QLatin1Char(ch);
0365             break;
0366         case '\r':
0367             // ###
0368             // The case of lonely '\r' is easy to solve, as they're
0369             // not part of Unix Line-ending conventions.
0370             // But I see a problem if we are given Unix-native
0371             // line-ending-mails, where we cannot determine anymore
0372             // whether a given '\n' was part of a CRLF or was occurring
0373             // on it's own.
0374             READ_ch_OR_FAIL;
0375             if (ch != '\n') {
0376                 // CR on it's own...
0377                 KMIME_WARN_LONE(CR);
0378                 result += QLatin1Char('\r');
0379                 scursor--; // points to after the '\r' again
0380             } else {
0381                 // CRLF encountered.
0382                 // lookahead: check for folding
0383                 READ_ch_OR_FAIL;
0384                 if (ch == ' ' || ch == '\t') {
0385                     // correct folding;
0386                     // position cursor behind the CRLF WSP (unfolding)
0387                     // and add the WSP to the result
0388                     result += QLatin1Char(ch);
0389                 } else {
0390                     // this is the "shouldn't happen"-case. There is a CRLF
0391                     // inside a quoted-string without it being part of FWS.
0392                     // We take it verbatim.
0393                     KMIME_WARN_NON_FOLDING(CRLF);
0394                     result += QLatin1String("\r\n");
0395                     // the cursor is decremented again, so's we need not
0396                     // duplicate the whole switch here. "ch" could've been
0397                     // everything (incl. openChar or closeChar).
0398                     scursor--;
0399                 }
0400             }
0401             break;
0402         case '\n':
0403             // Note: CRLF has been handled above already!
0404             // ### LF needs special treatment, depending on whether isCRLF
0405             // is true (we can be sure a lonely '\n' was meant this way) or
0406             // false ('\n' alone could have meant LF or CRLF in the original
0407             // message. This parser assumes CRLF iff the LF is followed by
0408             // either WSP (folding) or NULL (premature end of quoted-string;
0409             // Should be fixed, since NULL is allowed as per rfc822).
0410             READ_ch_OR_FAIL;
0411             if (!isCRLF && (ch == ' ' || ch == '\t')) {
0412                 // folding
0413                 // correct folding
0414                 result += QLatin1Char(ch);
0415             } else {
0416                 // non-folding
0417                 KMIME_WARN_LONE(LF);
0418                 result += QLatin1Char('\n');
0419                 // pos is decremented, so's we need not duplicate the whole
0420                 // switch here. ch could've been everything (incl. <">, "\").
0421                 scursor--;
0422             }
0423             break;
0424         case '=': {
0425             // ### Work around broken clients that send encoded words in quoted-strings
0426             //     For example, older KMail versions.
0427             if (scursor == send) {
0428                 break;
0429             }
0430 
0431             const char *oldscursor = scursor;
0432             QString tmp;
0433             QByteArray lang;
0434             QByteArray charset;
0435             if (*scursor++ == '?') {
0436                 --scursor;
0437                 if (parseEncodedWord(scursor, send, tmp, lang, charset)) {
0438                     result += tmp;
0439                     //qDebug() << " tmp " << tmp;
0440                     if (scursor == send) {
0441                         break;
0442                     } else if (*scursor++ == ' ') { //Workaround Bug 362650 thunderbird add space for each new line
0443                         if (scursor == send) {
0444                             --scursor;
0445                             break;
0446                         } else if (*scursor++ == '=') {
0447                             if (scursor == send) {
0448                                 --scursor;
0449                                 --scursor;
0450                                 break;
0451                             } else if (*scursor++ == '?') {
0452                                 --scursor;
0453                                 --scursor;
0454                                 break;
0455                             }
0456                         } else {
0457                             --scursor;
0458                             --scursor;
0459                         }
0460                     } else {
0461                         --scursor;
0462                     }
0463 
0464                     break;
0465                 } else {
0466                     scursor = oldscursor;
0467                 }
0468             } else {
0469                 scursor = oldscursor;
0470             }
0471             // fall through
0472             Q_FALLTHROUGH();
0473         }
0474         default:
0475             KMIME_WARN_IF_8BIT(ch);
0476             result += QLatin1Char(ch);
0477         }
0478     }
0479 
0480     return false;
0481 }
0482 
0483 // known issues:
0484 //
0485 // - doesn't handle encoded-word inside comments.
0486 
0487 bool parseComment(const char *&scursor, const char *const send,
0488                   QString &result, bool isCRLF, bool reallySave)
0489 {
0490     int commentNestingDepth = 1;
0491     const char *afterLastClosingParenPos = nullptr;
0492     QString maybeCmnt;
0493     const char *oldscursor = scursor;
0494 
0495     assert(*(scursor - 1) == '(');
0496 
0497     while (commentNestingDepth) {
0498         QString cmntPart;
0499         if (parseGenericQuotedString(scursor, send, cmntPart, isCRLF, '(', ')')) {
0500             assert(*(scursor - 1) == ')' || *(scursor - 1) == '(');
0501             // see the kdoc for above function for the possible conditions
0502             // we have to check:
0503             switch (*(scursor - 1)) {
0504             case ')':
0505                 if (reallySave) {
0506                     // add the chunk that's now surely inside the comment.
0507                     result += maybeCmnt;
0508                     result += cmntPart;
0509                     if (commentNestingDepth > 1) {
0510                         // don't add the outermost ')'...
0511                         result += QLatin1Char(')');
0512                     }
0513                     maybeCmnt.clear();
0514                 }
0515                 afterLastClosingParenPos = scursor;
0516                 --commentNestingDepth;
0517                 break;
0518             case '(':
0519                 if (reallySave) {
0520                     // don't add to "result" yet, because we might find that we
0521                     // are already outside the (broken) comment...
0522                     maybeCmnt += cmntPart;
0523                     maybeCmnt += QLatin1Char('(');
0524                 }
0525                 ++commentNestingDepth;
0526                 break;
0527             default: assert(0);
0528             } // switch
0529         } else {
0530             // !parseGenericQuotedString, ie. premature end
0531             if (afterLastClosingParenPos) {
0532                 scursor = afterLastClosingParenPos;
0533             } else {
0534                 scursor = oldscursor;
0535             }
0536             return false;
0537         }
0538     } // while
0539 
0540     return true;
0541 }
0542 
0543 // known issues: none.
0544 
0545 bool parsePhrase(const char *&scursor, const char *const send,
0546                  QString &result, bool isCRLF)
0547 {
0548     enum {
0549         None, Phrase, Atom, EncodedWord, QuotedString
0550     } found = None;
0551 
0552     QString tmp;
0553     QByteArray lang;
0554     QByteArray charset;
0555     QPair<const char *, int> tmpAtom;
0556     const char *successfullyParsed = nullptr;
0557     // only used by the encoded-word branch
0558     const char *oldscursor;
0559     // used to suppress whitespace between adjacent encoded-words
0560     // (rfc2047, 6.2):
0561     bool lastWasEncodedWord = false;
0562 
0563     while (scursor != send) {
0564         char ch = *scursor++;
0565         switch (ch) {
0566         case '.': // broken, but allow for intorop's sake
0567             if (found == None) {
0568                 --scursor;
0569                 return false;
0570             } else {
0571                 if (scursor != send && (*scursor == ' ' || *scursor == '\t')) {
0572                     result += QLatin1String(". ");
0573                 } else {
0574                     result += QLatin1Char('.');
0575                 }
0576                 successfullyParsed = scursor;
0577             }
0578             break;
0579         case '"': // quoted-string
0580             tmp.clear();
0581             if (parseGenericQuotedString(scursor, send, tmp, isCRLF, '"', '"')) {
0582                 successfullyParsed = scursor;
0583                 assert(*(scursor - 1) == '"');
0584                 switch (found) {
0585                 case None:
0586                     found = QuotedString;
0587                     break;
0588                 case Phrase:
0589                 case Atom:
0590                 case EncodedWord:
0591                 case QuotedString:
0592                     found = Phrase;
0593                     result += QLatin1Char(' ');   // rfc822, 3.4.4
0594                     break;
0595                 default:
0596                     assert(0);
0597                 }
0598                 lastWasEncodedWord = false;
0599                 result += tmp;
0600             } else {
0601                 // premature end of quoted string.
0602                 // What to do? Return leading '"' as special? Return as quoted-string?
0603                 // We do the latter if we already found something, else signal failure.
0604                 if (found == None) {
0605                     return false;
0606                 } else {
0607                     result += QLatin1Char(' ');   // rfc822, 3.4.4
0608                     result += tmp;
0609                     return true;
0610                 }
0611             }
0612             break;
0613         case '(': // comment
0614             // parse it, but ignore content:
0615             tmp.clear();
0616             if (parseComment(scursor, send, tmp, isCRLF,
0617                              false /*don't bother with the content*/)) {
0618                 successfullyParsed = scursor;
0619                 lastWasEncodedWord = false; // strictly interpreting rfc2047, 6.2
0620             } else {
0621                 if (found == None) {
0622                     return false;
0623                 } else {
0624                     scursor = successfullyParsed;
0625                     return true;
0626                 }
0627             }
0628             break;
0629         case '=': // encoded-word
0630             tmp.clear();
0631             oldscursor = scursor;
0632             lang.clear();
0633             charset.clear();
0634             if (parseEncodedWord(scursor, send, tmp, lang, charset)) {
0635                 successfullyParsed = scursor;
0636                 switch (found) {
0637                 case None:
0638                     found = EncodedWord;
0639                     break;
0640                 case Phrase:
0641                 case EncodedWord:
0642                 case Atom:
0643                 case QuotedString:
0644                     if (!lastWasEncodedWord) {
0645                         result += QLatin1Char(' ');   // rfc822, 3.4.4
0646                     }
0647                     found = Phrase;
0648                     break;
0649                 default: assert(0);
0650                 }
0651                 lastWasEncodedWord = true;
0652                 result += tmp;
0653                 break;
0654             } else {
0655                 // parse as atom:
0656                 scursor = oldscursor;
0657             }
0658             Q_FALLTHROUGH();
0659             // fall though...
0660 
0661         default: //atom
0662             scursor--;
0663             if (parseAtom(scursor, send, tmpAtom, true /* allow 8bit */)) {
0664                 successfullyParsed = scursor;
0665                 switch (found) {
0666                 case None:
0667                     found = Atom;
0668                     break;
0669                 case Phrase:
0670                 case Atom:
0671                 case EncodedWord:
0672                 case QuotedString:
0673                     found = Phrase;
0674                     result += QLatin1Char(' ');   // rfc822, 3.4.4
0675                     break;
0676                 default:
0677                     assert(0);
0678                 }
0679                 lastWasEncodedWord = false;
0680                 result += QLatin1String(tmpAtom.first, tmpAtom.second);
0681             } else {
0682                 if (found == None) {
0683                     return false;
0684                 } else {
0685                     scursor = successfullyParsed;
0686                     return true;
0687                 }
0688             }
0689         }
0690         eatWhiteSpace(scursor, send);
0691     }
0692 
0693     return found != None;
0694 }
0695 
0696 bool parseDotAtom(const char *&scursor, const char *const send,
0697                   QByteArray &result, bool isCRLF)
0698 {
0699     eatCFWS(scursor, send, isCRLF);
0700 
0701     // always points to just after the last atom parsed:
0702     const char *successfullyParsed;
0703 
0704     QByteArray tmp;
0705     if (!parseAtom(scursor, send, tmp, false /* no 8bit */)) {
0706         return false;
0707     }
0708     result += tmp;
0709     successfullyParsed = scursor;
0710 
0711     while (scursor != send) {
0712 
0713         // end of header or no '.' -> return
0714         if (scursor == send || *scursor != '.') {
0715             return true;
0716         }
0717         scursor++; // eat '.'
0718 
0719         if (scursor == send || !isAText(*scursor)) {
0720             // end of header or no AText, but this time following a '.'!:
0721             // reset cursor to just after last successfully parsed char and
0722             // return:
0723             scursor = successfullyParsed;
0724             return true;
0725         }
0726 
0727         // try to parse the next atom:
0728         QByteArray maybeAtom;
0729         if (!parseAtom(scursor, send, maybeAtom, false /*no 8bit*/)) {
0730             scursor = successfullyParsed;
0731             return true;
0732         }
0733 
0734         result += '.';
0735         result += maybeAtom;
0736         successfullyParsed = scursor;
0737     }
0738 
0739     scursor = successfullyParsed;
0740     return true;
0741 }
0742 
0743 void eatCFWS(const char *&scursor, const char *const send, bool isCRLF)
0744 {
0745     QString dummy;
0746 
0747     while (scursor != send) {
0748         const char *oldscursor = scursor;
0749 
0750         char ch = *scursor++;
0751 
0752         switch (ch) {
0753         case ' ':
0754         case '\t': // whitespace
0755         case '\r':
0756         case '\n': // folding
0757             continue;
0758 
0759         case '(': // comment
0760             if (parseComment(scursor, send, dummy, isCRLF, false /*don't save*/)) {
0761                 continue;
0762             }
0763             scursor = oldscursor;
0764             return;
0765 
0766         default:
0767             scursor = oldscursor;
0768             return;
0769         }
0770     }
0771 }
0772 
0773 bool parseDomain(const char *&scursor, const char *const send,
0774                  QString &result, bool isCRLF)
0775 {
0776     eatCFWS(scursor, send, isCRLF);
0777     if (scursor == send) {
0778         return false;
0779     }
0780 
0781     // domain := dot-atom / domain-literal / atom *("." atom)
0782     //
0783     // equivalent to:
0784     // domain = dot-atom / domain-literal,
0785     // since parseDotAtom does allow CFWS between atoms and dots
0786 
0787     if (*scursor == '[') {
0788         // domain-literal:
0789         QString maybeDomainLiteral;
0790         // eat '[':
0791         scursor++;
0792         while (parseGenericQuotedString(scursor, send, maybeDomainLiteral,
0793                                         isCRLF, '[', ']')) {
0794             if (scursor == send) {
0795                 // end of header: check for closing ']':
0796                 if (*(scursor - 1) == ']') {
0797                     // OK, last char was ']':
0798                     result = maybeDomainLiteral;
0799                     return true;
0800                 } else {
0801                     // not OK, domain-literal wasn't closed:
0802                     return false;
0803                 }
0804             }
0805             // we hit openChar in parseGenericQuotedString.
0806             // include it in maybeDomainLiteral and keep on parsing:
0807             if (*(scursor - 1) == '[') {
0808                 maybeDomainLiteral += QLatin1Char('[');
0809                 continue;
0810             }
0811             // OK, real end of domain-literal:
0812             result = maybeDomainLiteral;
0813             return true;
0814         }
0815     } else {
0816         // dot-atom:
0817         QByteArray maybeDotAtom;
0818         if (parseDotAtom(scursor, send, maybeDotAtom, isCRLF)) {
0819             // Domain may end with '.', if so preserve it'
0820             if (scursor != send && *scursor == '.') {
0821                 maybeDotAtom += '.';
0822                 scursor++;
0823             }
0824             result = QString::fromLatin1(maybeDotAtom);
0825             return true;
0826         }
0827     }
0828     return false;
0829 }
0830 
0831 bool parseObsRoute(const char *&scursor, const char *const send,
0832                    QStringList &result, bool isCRLF, bool save)
0833 {
0834     while (scursor != send) {
0835         eatCFWS(scursor, send, isCRLF);
0836         if (scursor == send) {
0837             return false;
0838         }
0839 
0840         // empty entry:
0841         if (*scursor == ',') {
0842             scursor++;
0843             if (save) {
0844                 result.append(QString());
0845             }
0846             continue;
0847         }
0848 
0849         // empty entry ending the list:
0850         if (*scursor == ':') {
0851             scursor++;
0852             if (save) {
0853                 result.append(QString());
0854             }
0855             return true;
0856         }
0857 
0858         // each non-empty entry must begin with '@':
0859         if (*scursor != '@') {
0860             return false;
0861         } else {
0862             scursor++;
0863         }
0864 
0865         QString maybeDomain;
0866         if (!parseDomain(scursor, send, maybeDomain, isCRLF)) {
0867             return false;
0868         }
0869         if (save) {
0870             result.append(maybeDomain);
0871         }
0872 
0873         // eat the following (optional) comma:
0874         eatCFWS(scursor, send, isCRLF);
0875         if (scursor == send) {
0876             return false;
0877         }
0878         if (*scursor == ':') {
0879             scursor++;
0880             return true;
0881         }
0882         if (*scursor == ',') {
0883             scursor++;
0884         }
0885     }
0886 
0887     return false;
0888 }
0889 
0890 bool parseAddrSpec(const char *&scursor, const char *const send,
0891                    AddrSpec &result, bool isCRLF)
0892 {
0893     //
0894     // STEP 1:
0895     // local-part := dot-atom / quoted-string / word *("." word)
0896     //
0897     // this is equivalent to:
0898     // local-part := word *("." word)
0899 
0900     QString maybeLocalPart;
0901     QString tmp;
0902     QPair<const char *, int> tmpAtom;
0903 
0904     while (scursor != send) {
0905         // first, eat any whitespace
0906         eatCFWS(scursor, send, isCRLF);
0907 
0908         char ch = *scursor++;
0909         switch (ch) {
0910         case '.': // dot
0911             maybeLocalPart += QLatin1Char('.');
0912             break;
0913 
0914         case '@':
0915             goto SAW_AT_SIGN;
0916             break;
0917 
0918         case '"': // quoted-string
0919             tmp.clear();
0920             if (parseGenericQuotedString(scursor, send, tmp, isCRLF, '"', '"')) {
0921                 maybeLocalPart += tmp;
0922             } else {
0923                 return false;
0924             }
0925             break;
0926 
0927         default: // atom
0928             scursor--; // re-set scursor to point to ch again
0929             if (parseAtom(scursor, send, tmpAtom, false /* no 8bit */)) {
0930                 maybeLocalPart += QLatin1String(tmpAtom.first, tmpAtom.second);
0931             } else {
0932                 return false; // parseAtom can only fail if the first char is non-atext.
0933             }
0934             break;
0935         }
0936     }
0937 
0938     return false;
0939 
0940     //
0941     // STEP 2:
0942     // domain
0943     //
0944 
0945 SAW_AT_SIGN:
0946 
0947     assert(*(scursor - 1) == '@');
0948 
0949     QString maybeDomain;
0950     if (!parseDomain(scursor, send, maybeDomain, isCRLF)) {
0951         return false;
0952     }
0953 
0954     result.localPart = maybeLocalPart;
0955     result.domain = maybeDomain;
0956 
0957     return true;
0958 }
0959 
0960 bool parseAngleAddr(const char *&scursor, const char *const send,
0961                     AddrSpec &result, bool isCRLF)
0962 {
0963     // first, we need an opening angle bracket:
0964     eatCFWS(scursor, send, isCRLF);
0965     if (scursor == send || *scursor != '<') {
0966         return false;
0967     }
0968     scursor++; // eat '<'
0969 
0970     eatCFWS(scursor, send, isCRLF);
0971     if (scursor == send) {
0972         return false;
0973     }
0974 
0975     if (*scursor == '@' || *scursor == ',') {
0976         // obs-route: parse, but ignore:
0977         KMIME_WARN << "obsolete source route found! ignoring.";
0978         QStringList dummy;
0979         if (!parseObsRoute(scursor, send, dummy,
0980                            isCRLF, false /* don't save */)) {
0981             return false;
0982         }
0983         // angle-addr isn't complete until after the '>':
0984         if (scursor == send) {
0985             return false;
0986         }
0987     }
0988 
0989     // parse addr-spec:
0990     AddrSpec maybeAddrSpec;
0991     if (!parseAddrSpec(scursor, send, maybeAddrSpec, isCRLF)) {
0992         return false;
0993     }
0994 
0995     eatCFWS(scursor, send, isCRLF);
0996     if (scursor == send || *scursor != '>') {
0997         return false;
0998     }
0999     scursor++;
1000 
1001     result = maybeAddrSpec;
1002     return true;
1003 
1004 }
1005 
1006 static QString stripQuotes(const QString &input)
1007 {
1008     const QLatin1Char quotes('"');
1009     if (input.startsWith(quotes) && input.endsWith(quotes)) {
1010         QString stripped(input.mid(1, input.size() - 2));
1011         return stripped;
1012     } else {
1013         return input;
1014     }
1015 }
1016 
1017 bool parseMailbox(const char *&scursor, const char *const send,
1018                   Mailbox &result, bool isCRLF)
1019 {
1020     eatCFWS(scursor, send, isCRLF);
1021     if (scursor == send) {
1022         return false;
1023     }
1024 
1025     AddrSpec maybeAddrSpec;
1026     QString maybeDisplayName;
1027 
1028     // first, try if it's a vanilla addr-spec:
1029     const char *oldscursor = scursor;
1030     if (parseAddrSpec(scursor, send, maybeAddrSpec, isCRLF)) {
1031         result.setAddress(maybeAddrSpec);
1032         // check for the obsolete form of display-name (as comment):
1033         eatWhiteSpace(scursor, send);
1034         if (scursor != send && *scursor == '(') {
1035             scursor++;
1036             if (!parseComment(scursor, send, maybeDisplayName, isCRLF, true /*keep*/)) {
1037                 return false;
1038             }
1039         }
1040         result.setName(stripQuotes(maybeDisplayName));
1041         return true;
1042     }
1043     scursor = oldscursor;
1044 
1045     // second, see if there's a display-name:
1046     if (!parsePhrase(scursor, send, maybeDisplayName, isCRLF)) {
1047         // failed: reset cursor, note absent display-name
1048         maybeDisplayName.clear();
1049         scursor = oldscursor;
1050     } else {
1051         // succeeded: eat CFWS
1052         eatCFWS(scursor, send, isCRLF);
1053         if (scursor == send) {
1054             return false;
1055         }
1056     }
1057 
1058     // third, parse the angle-addr:
1059     if (!parseAngleAddr(scursor, send, maybeAddrSpec, isCRLF)) {
1060         return false;
1061     }
1062 
1063     if (maybeDisplayName.isNull()) {
1064         // check for the obsolete form of display-name (as comment):
1065         eatWhiteSpace(scursor, send);
1066         if (scursor != send && *scursor == '(') {
1067             scursor++;
1068             if (!parseComment(scursor, send, maybeDisplayName, isCRLF, true /*keep*/)) {
1069                 return false;
1070             }
1071         }
1072     }
1073 
1074     result.setName(stripQuotes(maybeDisplayName));
1075     result.setAddress(maybeAddrSpec);
1076     return true;
1077 }
1078 
1079 bool parseGroup(const char *&scursor, const char *const send,
1080                 Address &result, bool isCRLF)
1081 {
1082     // group         := display-name ":" [ mailbox-list / CFWS ] ";" [CFWS]
1083     //
1084     // equivalent to:
1085     // group   := display-name ":" [ obs-mbox-list ] ";"
1086 
1087     eatCFWS(scursor, send, isCRLF);
1088     if (scursor == send) {
1089         return false;
1090     }
1091 
1092     // get display-name:
1093     QString maybeDisplayName;
1094     if (!parsePhrase(scursor, send, maybeDisplayName, isCRLF)) {
1095         return false;
1096     }
1097 
1098     // get ":":
1099     eatCFWS(scursor, send, isCRLF);
1100     if (scursor == send || *scursor != ':') {
1101         return false;
1102     }
1103 
1104     // KDE5 TODO: Don't expose displayName as public, but rather add setter for it that
1105     //            automatically calls removeBidiControlChars
1106     result.displayName = removeBidiControlChars(maybeDisplayName);
1107 
1108     // get obs-mbox-list (may contain empty entries):
1109     scursor++;
1110     while (scursor != send) {
1111         eatCFWS(scursor, send, isCRLF);
1112         if (scursor == send) {
1113             return false;
1114         }
1115 
1116         // empty entry:
1117         if (*scursor == ',') {
1118             scursor++;
1119             continue;
1120         }
1121 
1122         // empty entry ending the list:
1123         if (*scursor == ';') {
1124             scursor++;
1125             return true;
1126         }
1127 
1128         Mailbox maybeMailbox;
1129         if (!parseMailbox(scursor, send, maybeMailbox, isCRLF)) {
1130             return false;
1131         }
1132         result.mailboxList.append(maybeMailbox);
1133 
1134         eatCFWS(scursor, send, isCRLF);
1135         // premature end:
1136         if (scursor == send) {
1137             return false;
1138         }
1139         // regular end of the list:
1140         if (*scursor == ';') {
1141             scursor++;
1142             return true;
1143         }
1144         // eat regular list entry separator:
1145         if (*scursor == ',') {
1146             scursor++;
1147         }
1148     }
1149     return false;
1150 }
1151 
1152 bool parseAddress(const char *&scursor, const char *const send,
1153                   Address &result, bool isCRLF)
1154 {
1155     // address       := mailbox / group
1156 
1157     eatCFWS(scursor, send, isCRLF);
1158     if (scursor == send) {
1159         return false;
1160     }
1161 
1162     // first try if it's a single mailbox:
1163     Mailbox maybeMailbox;
1164     const char *oldscursor = scursor;
1165     if (parseMailbox(scursor, send, maybeMailbox, isCRLF)) {
1166         // yes, it is:
1167         result.displayName.clear();
1168         result.mailboxList.append(maybeMailbox);
1169         return true;
1170     }
1171     scursor = oldscursor;
1172 
1173     Address maybeAddress;
1174 
1175     // no, it's not a single mailbox. Try if it's a group:
1176     if (!parseGroup(scursor, send, maybeAddress, isCRLF)) {
1177         return false;
1178     }
1179 
1180     result = maybeAddress;
1181     return true;
1182 }
1183 
1184 bool parseAddressList(const char *&scursor, const char *const send,
1185                       AddressList &result, bool isCRLF)
1186 {
1187     while (scursor != send) {
1188         eatCFWS(scursor, send, isCRLF);
1189         // end of header: this is OK.
1190         if (scursor == send) {
1191             return true;
1192         }
1193         // empty entry: ignore:
1194         if (*scursor == ',') {
1195             scursor++;
1196             continue;
1197         }
1198         // broken clients might use ';' as list delimiter, accept that as well
1199         if (*scursor == ';') {
1200             scursor++;
1201             continue;
1202         }
1203 
1204         // parse one entry
1205         Address maybeAddress;
1206         if (!parseAddress(scursor, send, maybeAddress, isCRLF)) {
1207             return false;
1208         }
1209         result.append(maybeAddress);
1210 
1211         eatCFWS(scursor, send, isCRLF);
1212         // end of header: this is OK.
1213         if (scursor == send) {
1214             return true;
1215         }
1216         // comma separating entries: eat it.
1217         if (*scursor == ',') {
1218             scursor++;
1219         }
1220     }
1221     return true;
1222 }
1223 
1224 static bool parseParameter(const char *&scursor, const char *const send,
1225                            QPair<QString, QStringOrQPair> &result, bool isCRLF)
1226 {
1227     // parameter = regular-parameter / extended-parameter
1228     // regular-parameter = regular-parameter-name "=" value
1229     // extended-parameter =
1230     // value = token / quoted-string
1231     //
1232     // note that rfc2231 handling is out of the scope of this function.
1233     // Therefore we return the attribute as QByteArray and the value as
1234     // (start,length) tuple if we see that the value is encoded
1235     // (trailing asterisk), for parseParameterList to decode...
1236 
1237     eatCFWS(scursor, send, isCRLF);
1238     if (scursor == send) {
1239         return false;
1240     }
1241 
1242     //
1243     // parse the parameter name:
1244     //
1245     QByteArray tmpAttr;
1246     if (!parseToken(scursor, send, tmpAttr, ParseTokenNoFlag)) {
1247         return false;
1248     }
1249     // FIXME: we could use QMap<QByteArray, ...> in the API for parameters
1250     QString maybeAttribute = QString::fromLatin1(tmpAttr);
1251 
1252     eatCFWS(scursor, send, isCRLF);
1253     // premature end: not OK (haven't seen '=' yet).
1254     if (scursor == send || *scursor != '=') {
1255         return false;
1256     }
1257     scursor++; // eat '='
1258 
1259     eatCFWS(scursor, send, isCRLF);
1260     if (scursor == send) {
1261         // don't choke on attribute=, meaning the value was omitted:
1262         if (maybeAttribute.endsWith(QLatin1Char('*'))) {
1263             KMIME_WARN << "attribute ends with \"*\", but value is empty!"
1264                        "Chopping away \"*\".";
1265             maybeAttribute.chop(1);
1266         }
1267         result = qMakePair(maybeAttribute.toLower(), QStringOrQPair());
1268         return true;
1269     }
1270 
1271     const char *oldscursor = scursor;
1272 
1273     //
1274     // parse the parameter value:
1275     //
1276     QStringOrQPair maybeValue;
1277     if (*scursor == '"') {
1278         // value is a quoted-string:
1279         scursor++;
1280         if (maybeAttribute.endsWith(QLatin1Char('*'))) {
1281             // attributes ending with "*" designate extended-parameters,
1282             // which cannot have quoted-strings as values. So we remove the
1283             // trailing "*" to not confuse upper layers.
1284             KMIME_WARN << "attribute ends with \"*\", but value is a quoted-string!"
1285                        "Chopping away \"*\".";
1286             maybeAttribute.chop(1);
1287         }
1288 
1289         if (!parseGenericQuotedString(scursor, send, maybeValue.qstring, isCRLF)) {
1290             scursor = oldscursor;
1291             result = qMakePair(maybeAttribute.toLower(), QStringOrQPair());
1292             return false; // this case needs further processing by upper layers!!
1293         }
1294     } else {
1295         // value is a token:
1296         if (!parseToken(scursor, send, maybeValue.qpair, ParseTokenRelaxedTText)) {
1297             scursor = oldscursor;
1298             result = qMakePair(maybeAttribute.toLower(), QStringOrQPair());
1299             return false; // this case needs further processing by upper layers!!
1300         }
1301     }
1302 
1303     result = qMakePair(maybeAttribute.toLower(), maybeValue);
1304     return true;
1305 }
1306 
1307 static bool parseRawParameterList(const char *&scursor, const char *const send,
1308                                   QMap<QString, QStringOrQPair> &result,
1309                                   bool isCRLF)
1310 {
1311     // we use parseParameter() consecutively to obtain a map of raw
1312     // attributes to raw values. "Raw" here means that we don't do
1313     // rfc2231 decoding and concatenation. This is left to
1314     // parseParameterList(), which will call this function.
1315     //
1316     // The main reason for making this chunk of code a separate
1317     // (private) method is that we can deal with broken parameters
1318     // _here_ and leave the rfc2231 handling solely to
1319     // parseParameterList(), which will still be enough work.
1320     while (scursor != send) {
1321         eatCFWS(scursor, send, isCRLF);
1322         // empty entry ending the list: OK.
1323         if (scursor == send) {
1324             return true;
1325         }
1326         // empty list entry: ignore.
1327         if (*scursor == ';') {
1328             scursor++;
1329             continue;
1330         }
1331         QPair<QString, QStringOrQPair> maybeParameter;
1332         if (!parseParameter(scursor, send, maybeParameter, isCRLF)) {
1333             // we need to do a bit of work if the attribute is not
1334             // NULL. These are the cases marked with "needs further
1335             // processing" in parseParameter(). Specifically, parsing of the
1336             // token or the quoted-string, which should represent the value,
1337             // failed. We take the easy way out and simply search for the
1338             // next ';' to start parsing again. (Another option would be to
1339             // take the text between '=' and ';' as value)
1340             if (maybeParameter.first.isNull()) {
1341                 return false;
1342             }
1343             while (scursor != send) {
1344                 if (*scursor++ == ';') {
1345                     goto IS_SEMICOLON;
1346                 }
1347             }
1348             // scursor == send case: end of list.
1349             return true;
1350         IS_SEMICOLON:
1351             // *scursor == ';' case: parse next entry.
1352             continue;
1353         }
1354         // successful parsing brings us here:
1355         result.insert(maybeParameter.first, maybeParameter.second);
1356 
1357         eatCFWS(scursor, send, isCRLF);
1358         // end of header: ends list.
1359         if (scursor == send) {
1360             return true;
1361         }
1362         // regular separator: eat it.
1363         if (*scursor == ';') {
1364             scursor++;
1365         }
1366     }
1367     return true;
1368 }
1369 
1370 static void decodeRFC2231Value(KCodecs::Codec *&rfc2231Codec,
1371                                QTextCodec *&textcodec,
1372                                bool isContinuation, QString &value,
1373                                QPair<const char *, int> &source, QByteArray &charset)
1374 {
1375     //
1376     // parse the raw value into (charset,language,text):
1377     //
1378 
1379     const char *decBegin = source.first;
1380     const char *decCursor = decBegin;
1381     const char *decEnd = decCursor + source.second;
1382 
1383     if (!isContinuation) {
1384         // find the first single quote
1385         while (decCursor != decEnd) {
1386             if (*decCursor == '\'') {
1387                 break;
1388             } else {
1389                 decCursor++;
1390             }
1391         }
1392 
1393         if (decCursor == decEnd) {
1394             // there wasn't a single single quote at all!
1395             // take the whole value to be in latin-1:
1396             KMIME_WARN << "No charset in extended-initial-value."
1397                        "Assuming \"iso-8859-1\".";
1398             value += QString::fromLatin1(decBegin, source.second);
1399             return;
1400         }
1401 
1402         charset = QByteArray(decBegin, decCursor - decBegin);
1403 
1404         const char *oldDecCursor = ++decCursor;
1405         // find the second single quote (we ignore the language tag):
1406         while (decCursor != decEnd) {
1407             if (*decCursor == '\'') {
1408                 break;
1409             } else {
1410                 decCursor++;
1411             }
1412         }
1413         if (decCursor == decEnd) {
1414             KMIME_WARN << "No language in extended-initial-value."
1415                        "Trying to recover.";
1416             decCursor = oldDecCursor;
1417         } else {
1418             decCursor++;
1419         }
1420 
1421         // decCursor now points to the start of the
1422         // "extended-other-values":
1423 
1424         //
1425         // get the decoders:
1426         //
1427 
1428         bool matchOK = false;
1429         textcodec = KCharsets::charsets()->codecForName(QLatin1String(charset), matchOK);
1430         if (!matchOK) {
1431             textcodec = nullptr;
1432             KMIME_WARN_UNKNOWN(Charset, charset);
1433         }
1434     }
1435 
1436     if (!rfc2231Codec) {
1437         rfc2231Codec = KCodecs::Codec::codecForName("x-kmime-rfc2231");
1438         assert(rfc2231Codec);
1439     }
1440 
1441     if (!textcodec) {
1442         value += QString::fromLatin1(decCursor, decEnd - decCursor);
1443         return;
1444     }
1445 
1446     KCodecs::Decoder *dec = rfc2231Codec->makeDecoder();
1447     assert(dec);
1448 
1449     //
1450     // do the decoding:
1451     //
1452 
1453     QByteArray buffer;
1454     buffer.resize(rfc2231Codec->maxDecodedSizeFor(decEnd - decCursor));
1455     QByteArray::Iterator bit = buffer.begin();
1456     QByteArray::ConstIterator bend = buffer.end();
1457 
1458     if (!dec->decode(decCursor, decEnd, bit, bend)) {
1459         KMIME_WARN << rfc2231Codec->name()
1460                    << "codec lies about its maxDecodedSizeFor()"
1461                    << Qt::endl
1462                    << "result may be truncated";
1463     }
1464 
1465     value += textcodec->toUnicode(buffer.begin(), bit - buffer.begin());
1466 
1467     // qCDebug(KMIME_LOG) << "value now: \"" << value << "\"";
1468     // cleanup:
1469     delete dec;
1470 }
1471 
1472 // known issues:
1473 //  - permutes rfc2231 continuations when the total number of parts
1474 //    exceeds 10 (other-sections then becomes *xy, ie. two digits)
1475 
1476 bool parseParameterListWithCharset(const char *&scursor,
1477                                    const char *const send,
1478                                    QMap<QString, QString> &result,
1479                                    QByteArray &charset, bool isCRLF)
1480 {
1481 // parse the list into raw attribute-value pairs:
1482     QMap<QString, QStringOrQPair> rawParameterList;
1483     if (!parseRawParameterList(scursor, send, rawParameterList, isCRLF)) {
1484         return false;
1485     }
1486 
1487     if (rawParameterList.isEmpty()) {
1488         return true;
1489     }
1490 
1491     // decode rfc 2231 continuations and alternate charset encoding:
1492 
1493     // NOTE: this code assumes that what QMapIterator delivers is sorted
1494     // by the key!
1495 
1496     KCodecs::Codec *rfc2231Codec = nullptr;
1497     QTextCodec *textcodec = nullptr;
1498     QString attribute;
1499     QString value;
1500     enum Mode {
1501         NoMode = 0x0, Continued = 0x1, Encoded = 0x2
1502     };
1503 
1504     enum EncodingMode {
1505         NoEncoding,
1506         RFC2047,
1507         RFC2231
1508     };
1509 
1510     QMap<QString, QStringOrQPair>::Iterator it;
1511     QMap<QString, QStringOrQPair>::Iterator end = rawParameterList.end();
1512 
1513     for (it = rawParameterList.begin() ; it != end ; ++it) {
1514         if (attribute.isNull() || !it.key().startsWith(attribute)) {
1515             //
1516             // new attribute:
1517             //
1518 
1519             // store the last attribute/value pair in the result map now:
1520             if (!attribute.isNull()) {
1521                 result.insert(attribute, value);
1522             }
1523             // and extract the information from the new raw attribute:
1524             value.clear();
1525             attribute = it.key();
1526             int mode = NoMode;
1527             EncodingMode encodingMode = NoEncoding;
1528 
1529             // is the value rfc2331-encoded?
1530             if (attribute.endsWith(QLatin1Char('*'))) {
1531                 attribute.chop(1);
1532                 mode |= Encoded;
1533                 encodingMode = RFC2231;
1534             }
1535             // is the value rfc2047-encoded?
1536             if (!(*it).qstring.isNull() && (*it).qstring.contains(QLatin1String("=?"))) {
1537                 mode |= Encoded;
1538                 encodingMode = RFC2047;
1539             }
1540             // is the value continued?
1541             if (attribute.endsWith(QLatin1String("*0"))) {
1542                 attribute.chop(2);
1543                 mode |= Continued;
1544             }
1545             //
1546             // decode if necessary:
1547             //
1548             if (mode & Encoded) {
1549                 if (encodingMode == RFC2231) {
1550                     decodeRFC2231Value(rfc2231Codec, textcodec,
1551                                        false, /* isn't continuation */
1552                                        value, (*it).qpair, charset);
1553                 } else if (encodingMode == RFC2047) {
1554                     value += KCodecs::decodeRFC2047String((*it).qstring.toLatin1(), &charset);
1555                 }
1556             } else {
1557                 // not encoded.
1558                 if ((*it).qpair.first) {
1559                     value += QString::fromLatin1((*it).qpair.first, (*it).qpair.second);
1560                 } else {
1561                     value += (*it).qstring;
1562                 }
1563             }
1564 
1565             //
1566             // shortcut-processing when the value isn't encoded:
1567             //
1568 
1569             if (!(mode & Continued)) {
1570                 // save result already:
1571                 result.insert(attribute, value);
1572                 // force begin of a new attribute:
1573                 attribute.clear();
1574             }
1575         } else { // it.key().startsWith( attribute )
1576             //
1577             // continuation
1578             //
1579 
1580             // ignore the section and trust QMap to have sorted the keys:
1581             if (it.key().endsWith(QLatin1Char('*'))) {
1582                 // encoded
1583                 decodeRFC2231Value(rfc2231Codec, textcodec,
1584                                    true, /* is continuation */
1585                                    value, (*it).qpair, charset);
1586             } else {
1587                 // not encoded
1588                 if ((*it).qpair.first) {
1589                     value += QString::fromLatin1((*it).qpair.first, (*it).qpair.second);
1590                 } else {
1591                     value += (*it).qstring;
1592                 }
1593             }
1594         }
1595     }
1596     // write last attr/value pair:
1597     if (!attribute.isNull()) {
1598         result.insert(attribute, value);
1599     }
1600 
1601     return true;
1602 }
1603 
1604 bool parseParameterList(const char *&scursor, const char *const send,
1605                         QMap<QString, QString> &result, bool isCRLF)
1606 {
1607     QByteArray charset;
1608     return parseParameterListWithCharset(scursor, send, result, charset, isCRLF);
1609 }
1610 
1611 static const char stdDayNames[][4] = {
1612     "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
1613 };
1614 static const int stdDayNamesLen = sizeof stdDayNames / sizeof *stdDayNames;
1615 
1616 static bool parseDayName(const char *&scursor, const char *const send)
1617 {
1618     // check bounds:
1619     if (send - scursor < 3) {
1620         return false;
1621     }
1622 
1623     for (int i = 0 ; i < stdDayNamesLen ; ++i) {
1624         if (qstrnicmp(scursor, stdDayNames[i], 3) == 0) {
1625             scursor += 3;
1626             // qCDebug(KMIME_LOG) << "found" << stdDayNames[i];
1627             return true;
1628         }
1629     }
1630 
1631     return false;
1632 }
1633 
1634 static const char stdMonthNames[][4] = {
1635     "Jan", "Feb", "Mar", "Apr", "May", "Jun",
1636     "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
1637 };
1638 static const int stdMonthNamesLen =
1639     sizeof stdMonthNames / sizeof *stdMonthNames;
1640 
1641 static bool parseMonthName(const char *&scursor, const char *const send,
1642                            int &result)
1643 {
1644     // check bounds:
1645     if (send - scursor < 3) {
1646         return false;
1647     }
1648 
1649     for (result = 0 ; result < stdMonthNamesLen ; ++result) {
1650         if (qstrnicmp(scursor, stdMonthNames[result], 3) == 0) {
1651             scursor += 3;
1652             return true;
1653         }
1654     }
1655 
1656     // not found:
1657     return false;
1658 }
1659 
1660 static const struct {
1661     const char tzName[5];
1662     long int secsEastOfGMT;
1663 } timeZones[] = {
1664     // rfc 822 timezones:
1665     { "GMT", 0 },
1666     { "UT", 0 },
1667     { "EDT", -4 * 3600 },
1668     { "EST", -5 * 3600 },
1669     { "MST", -5 * 3600 },
1670     { "CST", -6 * 3600 },
1671     { "MDT", -6 * 3600 },
1672     { "MST", -7 * 3600 },
1673     { "PDT", -7 * 3600 },
1674     { "PST", -8 * 3600 },
1675     // common, non-rfc-822 zones:
1676     { "CET", 1 * 3600 },
1677     { "MET", 1 * 3600 },
1678     { "UTC", 0 },
1679     { "CEST", 2 * 3600 },
1680     { "BST", 1 * 3600 },
1681     // rfc 822 military timezones:
1682     { "Z", 0 },
1683     { "A", -1 * 3600 },
1684     { "B", -2 * 3600 },
1685     { "C", -3 * 3600 },
1686     { "D", -4 * 3600 },
1687     { "E", -5 * 3600 },
1688     { "F", -6 * 3600 },
1689     { "G", -7 * 3600 },
1690     { "H", -8 * 3600 },
1691     { "I", -9 * 3600 },
1692     // J is not used!
1693     { "K", -10 * 3600 },
1694     { "L", -11 * 3600 },
1695     { "M", -12 * 3600 },
1696     { "N", 1 * 3600 },
1697     { "O", 2 * 3600 },
1698     { "P", 3 * 3600 },
1699     { "Q", 4 * 3600 },
1700     { "R", 5 * 3600 },
1701     { "S", 6 * 3600 },
1702     { "T", 7 * 3600 },
1703     { "U", 8 * 3600 },
1704     { "V", 9 * 3600 },
1705     { "W", 10 * 3600 },
1706     { "X", 11 * 3600 },
1707     { "Y", 12 * 3600 },
1708 };
1709 static const int timeZonesLen = sizeof timeZones / sizeof *timeZones;
1710 
1711 static bool parseAlphaNumericTimeZone(const char *&scursor,
1712                                       const char *const send,
1713                                       long int &secsEastOfGMT,
1714                                       bool &timeZoneKnown)
1715 {
1716     // allow the timezone to be wrapped in quotes; bug 260761
1717     if (scursor < send && *scursor == '"') {
1718         scursor++;
1719 
1720         if (scursor == send) {
1721             return false;
1722         }
1723     }
1724 
1725     QPair<const char *, int> maybeTimeZone(nullptr, 0);
1726     if (!parseToken(scursor, send, maybeTimeZone, ParseTokenNoFlag)) {
1727         return false;
1728     }
1729     for (int i = 0 ; i < timeZonesLen ; ++i) {
1730         if (qstrnicmp(timeZones[i].tzName,
1731                       maybeTimeZone.first, maybeTimeZone.second) == 0) {
1732             scursor += maybeTimeZone.second;
1733             secsEastOfGMT = timeZones[i].secsEastOfGMT;
1734             timeZoneKnown = true;
1735 
1736             if (scursor < send && *scursor == '"') {
1737                 scursor++;
1738             }
1739 
1740             return true;
1741         }
1742     }
1743 
1744     // don't choke just because we don't happen to know the time zone
1745     KMIME_WARN_UNKNOWN(time zone,
1746                        QByteArray(maybeTimeZone.first, maybeTimeZone.second));
1747     secsEastOfGMT = 0;
1748     timeZoneKnown = false;
1749     return true;
1750 }
1751 
1752 // parse a number and return the number of digits parsed:
1753 int parseDigits(const char *&scursor, const char *const send, int &result)
1754 {
1755     result = 0;
1756     int digits = 0;
1757     for (; scursor != send && isdigit(*scursor) ; scursor++, digits++) {
1758         result *= 10;
1759         result += int(*scursor - '0');
1760     }
1761     return digits;
1762 }
1763 
1764 static bool parseTimeOfDay(const char *&scursor, const char *const send,
1765                            int &hour, int &min, int &sec, bool isCRLF = false)
1766 {
1767     // time-of-day := 2DIGIT [CFWS] ":" [CFWS] 2DIGIT [ [CFWS] ":" 2DIGIT ]
1768 
1769     //
1770     // 2DIGIT representing "hour":
1771     //
1772     if (!parseDigits(scursor, send, hour)) {
1773         return false;
1774     }
1775 
1776     eatCFWS(scursor, send, isCRLF);
1777     if (scursor == send || *scursor != ':') {
1778         return false;
1779     }
1780     scursor++; // eat ':'
1781 
1782     eatCFWS(scursor, send, isCRLF);
1783     if (scursor == send) {
1784         return false;
1785     }
1786 
1787     //
1788     // 2DIGIT representing "minute":
1789     //
1790     if (!parseDigits(scursor, send, min)) {
1791         return false;
1792     }
1793 
1794     eatCFWS(scursor, send, isCRLF);
1795     if (scursor == send) {
1796         return true; // seconds are optional
1797     }
1798 
1799     //
1800     // let's see if we have a 2DIGIT representing "second":
1801     //
1802     if (*scursor == ':') {
1803         // yepp, there are seconds:
1804         scursor++; // eat ':'
1805         eatCFWS(scursor, send, isCRLF);
1806         if (scursor == send) {
1807             return false;
1808         }
1809 
1810         if (!parseDigits(scursor, send, sec)) {
1811             return false;
1812         }
1813     } else {
1814         sec = 0;
1815     }
1816 
1817     return true;
1818 }
1819 
1820 bool parseTime(const char *&scursor, const char *send,
1821                int &hour, int &min, int &sec, long int &secsEastOfGMT,
1822                bool &timeZoneKnown, bool isCRLF)
1823 {
1824     // time := time-of-day CFWS ( zone / obs-zone )
1825     //
1826     // obs-zone    := "UT" / "GMT" /
1827     //                "EST" / "EDT" / ; -0500 / -0400
1828     //                "CST" / "CDT" / ; -0600 / -0500
1829     //                "MST" / "MDT" / ; -0700 / -0600
1830     //                "PST" / "PDT" / ; -0800 / -0700
1831     //                "A"-"I" / "a"-"i" /
1832     //                "K"-"Z" / "k"-"z"
1833 
1834     eatCFWS(scursor, send, isCRLF);
1835     if (scursor == send) {
1836         return false;
1837     }
1838 
1839     if (!parseTimeOfDay(scursor, send, hour, min, sec, isCRLF)) {
1840         return false;
1841     }
1842 
1843     eatCFWS(scursor, send, isCRLF);
1844     // there might be no timezone but a year following
1845     if ((scursor == send) || isdigit(*scursor)) {
1846         timeZoneKnown = false;
1847         secsEastOfGMT = 0;
1848         return true; // allow missing timezone
1849     }
1850 
1851     timeZoneKnown = true;
1852     if (*scursor == '+' || *scursor == '-') {
1853         // remember and eat '-'/'+':
1854         const char sign = *scursor++;
1855         // numerical timezone:
1856         int maybeTimeZone;
1857         const int tzDigits = parseDigits(scursor, send, maybeTimeZone);
1858         if (tzDigits != 4) {
1859             // Allow timezones in 02:00 format
1860             if (tzDigits == 2 && scursor != send && *scursor == ':') {
1861                 scursor++;
1862                 int maybeTimeZone2;
1863                 if (parseDigits(scursor, send, maybeTimeZone2) != 2) {
1864                     return false;
1865                 }
1866                 maybeTimeZone = maybeTimeZone * 100 + maybeTimeZone2;
1867             } else {
1868                 return false;
1869             }
1870         }
1871         secsEastOfGMT = 60 * (maybeTimeZone / 100 * 60 + maybeTimeZone % 100);
1872         if (sign == '-') {
1873             secsEastOfGMT *= -1;
1874             if (secsEastOfGMT == 0) {
1875                 timeZoneKnown = false; // -0000 means indetermined tz
1876             }
1877         }
1878     } else {
1879         // maybe alphanumeric timezone:
1880         if (!parseAlphaNumericTimeZone(scursor, send, secsEastOfGMT, timeZoneKnown)) {
1881             return false;
1882         }
1883     }
1884     return true;
1885 }
1886 
1887 bool parseDateTime(const char *&scursor, const char *const send,
1888                    QDateTime &result, bool isCRLF)
1889 {
1890     // Parsing date-time; strict mode:
1891     //
1892     // date-time   := [ [CFWS] day-name [CFWS] "," ]                      ; wday
1893     // (expanded)     [CFWS] 1*2DIGIT CFWS month-name CFWS 2*DIGIT [CFWS] ; date
1894     //                time
1895     //
1896     // day-name    := "Mon" / "Tue" / "Wed" / "Thu" / "Fri" / "Sat" / "Sun"
1897     // month-name  := "Jan" / "Feb" / "Mar" / "Apr" / "May" / "Jun" /
1898     //                "Jul" / "Aug" / "Sep" / "Oct" / "Nov" / "Dec"
1899 
1900     result = QDateTime();
1901 
1902     eatCFWS(scursor, send, isCRLF);
1903     if (scursor == send) {
1904         return false;
1905     }
1906 
1907     //
1908     // let's see if there's a day-of-week:
1909     //
1910     if (parseDayName(scursor, send)) {
1911         eatCFWS(scursor, send, isCRLF);
1912         if (scursor == send) {
1913             return false;
1914         }
1915         // day-name should be followed by ',' but we treat it as optional:
1916         if (*scursor == ',') {
1917             scursor++; // eat ','
1918             eatCFWS(scursor, send, isCRLF);
1919         }
1920     }
1921 
1922     int maybeMonth = -1;
1923     bool asctimeFormat = false;
1924 
1925     // ANSI-C asctime() format is: Wed Jun 30 21:49:08 1993
1926     if (!isdigit(*scursor) && parseMonthName(scursor, send, maybeMonth)) {
1927         asctimeFormat = true;
1928         eatCFWS(scursor, send, isCRLF);
1929     }
1930 
1931     //
1932     // 1*2DIGIT representing "day" (of month):
1933     //
1934     int maybeDay;
1935     if (!parseDigits(scursor, send, maybeDay)) {
1936         return false;
1937     }
1938 
1939     eatCFWS(scursor, send, isCRLF);
1940     if (scursor == send) {
1941         return false;
1942     }
1943 
1944     // ignore ","; bug 54098
1945     if (*scursor == ',') {
1946         scursor++;
1947     }
1948 
1949     //
1950     // month-name:
1951     //
1952     if (!asctimeFormat && !parseMonthName(scursor, send, maybeMonth)) {
1953         return false;
1954     }
1955     if (scursor == send) {
1956         return false;
1957     }
1958     assert(maybeMonth >= 0); assert(maybeMonth <= 11);
1959     ++maybeMonth; // 0-11 -> 1-12
1960 
1961     eatCFWS(scursor, send, isCRLF);
1962     if (scursor == send) {
1963         return false;
1964     }
1965 
1966     // check for "year HH:MM:SS" or only "HH:MM:SS" (or "H:MM:SS")
1967     bool timeAfterYear = true;
1968     if ((send - scursor > 3) && ((scursor[1] == ':') || (scursor[2] == ':'))) {
1969         timeAfterYear = false;  // first read time, then year
1970     }
1971 
1972     //
1973     // 2*DIGIT representing "year":
1974     //
1975     int maybeYear = 0;
1976 
1977     if (timeAfterYear && !parseDigits(scursor, send, maybeYear)) {
1978         return false;
1979     }
1980 
1981     eatCFWS(scursor, send, isCRLF);
1982     if (scursor == send) {
1983         return false;
1984     }
1985 
1986     //
1987     // time
1988     //
1989     int maybeHour;
1990     int maybeMinute;
1991     int maybeSecond;
1992     long int secsEastOfGMT;
1993     bool timeZoneKnown = true;
1994 
1995     if (!parseTime(scursor, send,
1996                    maybeHour, maybeMinute, maybeSecond,
1997                    secsEastOfGMT, timeZoneKnown, isCRLF)) {
1998         return false;
1999     }
2000 
2001     // in asctime() the year follows the time
2002     if (!timeAfterYear) {
2003         eatCFWS(scursor, send, isCRLF);
2004         if (scursor == send) {
2005             return false;
2006         }
2007 
2008         if (!parseDigits(scursor, send, maybeYear)) {
2009             return false;
2010         }
2011     }
2012 
2013     // RFC 2822 4.3 processing:
2014     if (maybeYear < 50) {
2015         maybeYear += 2000;
2016     } else if (maybeYear < 1000) {
2017         maybeYear += 1900;
2018     }
2019     // else keep as is
2020     if (maybeYear < 1900) {
2021         return false; // rfc2822, 3.3
2022     }
2023 
2024     const QDate maybeDate = QDate(maybeYear, maybeMonth, maybeDay);
2025     const QTime maybeTime = QTime(maybeHour, maybeMinute, maybeSecond);
2026 
2027     if (!maybeDate.isValid() || !maybeTime.isValid()) {
2028         return false;
2029     }
2030 
2031     result = QDateTime(maybeDate, maybeTime, Qt::OffsetFromUTC, secsEastOfGMT);
2032     //result = QDateTime( maybeDateTime, QDateTime::Spec( QDateTime::OffsetFromUTC, secsEastOfGMT ) );
2033     if (!result.isValid()) {
2034         return false;
2035     }
2036     return true;
2037 }
2038 
2039 namespace {
2040 
2041 Headers::Base *extractHeader(const QByteArray &head, const int headerStart, int &endOfFieldBody)
2042 {
2043     Headers::Base *header = {};
2044 
2045     int startOfFieldBody = head.indexOf(':', headerStart);
2046     if (startOfFieldBody < 0) {
2047         return nullptr;
2048     }
2049 
2050     const char *rawType = head.constData() + headerStart;
2051     const size_t rawTypeLen = startOfFieldBody - headerStart;
2052 
2053     startOfFieldBody++; //skip the ':'
2054     if (startOfFieldBody < head.size() - 1 &&  head[startOfFieldBody] == ' ') { // skip the space after the ':', if there's any
2055         startOfFieldBody++;
2056     }
2057 
2058     bool folded = false;
2059     endOfFieldBody = findHeaderLineEnd(head, startOfFieldBody, &folded);
2060 
2061     // We might get an invalid mail without a field name, don't crash on that.
2062     if (rawTypeLen > 0) {
2063         header = HeaderFactory::createHeader(rawType, rawTypeLen);
2064     }
2065     if (!header) {
2066         //qCWarning(KMIME_LOG)() << "Returning Generic header of type" << rawType;
2067         header = new Headers::Generic(rawType, rawTypeLen);
2068     }
2069     if (folded) {
2070         const auto unfoldedBody = unfoldHeader(head.constData() + startOfFieldBody, endOfFieldBody - startOfFieldBody);
2071         header->from7BitString(unfoldedBody);
2072     } else {
2073         header->from7BitString(head.constData() + startOfFieldBody, endOfFieldBody - startOfFieldBody);
2074     }
2075 
2076     return header;
2077 }
2078 
2079 }
2080 
2081 Headers::Base *extractFirstHeader(QByteArray &head)
2082 {
2083     int endOfFieldBody = 0;
2084     auto header = extractHeader(head, 0, endOfFieldBody);
2085     if (header) {
2086         head.remove(0, endOfFieldBody + 1);
2087     } else {
2088         head.clear();
2089     }
2090 
2091     return header;
2092 }
2093 
2094 void extractHeaderAndBody(const QByteArray &content, QByteArray &header, QByteArray &body)
2095 {
2096     header.clear();
2097     body.clear();
2098 
2099     // empty header
2100     if (content.startsWith('\n')) {
2101         body = content.right(content.length() - 1);
2102         return;
2103     }
2104 
2105     int pos = content.indexOf("\n\n", 0);
2106     if (pos > -1) {
2107         header = content.left(++pos);    //header *must* end with "\n" !!
2108         body = content.mid(pos + 1);
2109         if (body.startsWith("\n")) {
2110             body = "\n" + body;
2111         }
2112     } else {
2113         header = content;
2114     }
2115 }
2116 
2117 QVector<Headers::Base*> parseHeaders(const QByteArray &head)
2118 {
2119     QVector<Headers::Base*> ret;
2120 
2121     int cursor = 0;
2122     while (cursor < head.size()) {
2123         const int headerStart = cursor;
2124         int endOfFieldBody;
2125         if (auto header = extractHeader(head, headerStart, endOfFieldBody)) {
2126             ret << header;
2127             cursor = endOfFieldBody + 1;
2128         } else {
2129             break;
2130         }
2131     }
2132 
2133     return ret;
2134 }
2135 
2136 } // namespace HeaderParsing
2137 
2138 } // namespace KMime