File indexing completed on 2024-09-22 04:52:49

0001 /* Copyright (C) 2006 - 2014 Jan Kundrát <jkt@flaska.net>
0002    Copyright (C) 2018 Erik Quaeghebeur <kde@equaeghe.nospammail.net>
0003 
0004    This file is part of the Trojita Qt IMAP e-mail client,
0005    http://trojita.flaska.net/
0006 
0007    This program is free software; you can redistribute it and/or
0008    modify it under the terms of the GNU General Public License as
0009    published by the Free Software Foundation; either version 2 of
0010    the License or (at your option) version 3 or any later version
0011    accepted by the membership of KDE e.V. (or its successor approved
0012    by the membership of KDE e.V.), which shall act as a proxy
0013    defined in Section 14 of version 3 of the license.
0014 
0015    This program is distributed in the hope that it will be useful,
0016    but WITHOUT ANY WARRANTY; without even the implied warranty of
0017    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
0018    GNU General Public License for more details.
0019 
0020    You should have received a copy of the GNU General Public License
0021    along with this program.  If not, see <http://www.gnu.org/licenses/>.
0022 */
0023 
0024 #include <limits>
0025 #include <QDebug>
0026 #include <QMap>
0027 #include <QPair>
0028 #include <QRegularExpression>
0029 #include <QRegularExpressionMatch>
0030 #include <QStringList>
0031 #include <QVariant>
0032 #include <QDateTime>
0033 #include "LowLevelParser.h"
0034 #include "../Exceptions.h"
0035 #include "Imap/Encoders.h"
0036 
0037 namespace Imap
0038 {
0039 namespace LowLevelParser
0040 {
0041 
0042 template<typename T> T extractNumber(const QByteArray &line, int &start)
0043 {
0044     if (start >= line.size())
0045         throw NoData("extractNumber: no data", line, start);
0046 
0047     const char *c_str = line.constData();
0048     c_str += start;
0049 
0050     if (*c_str < '0' || *c_str > '9')
0051         throw ParseError("extractNumber: not a number", line, start);
0052 
0053     T res = 0;
0054     // well, it's an inline function, but clang still won't cache its result by default
0055     const T absoluteMax = std::numeric_limits<T>::max();
0056     const T softLimit = (absoluteMax - 10) / 10;
0057     while (*c_str >= '0' && *c_str <= '9') {
0058         auto digit = *c_str - '0';
0059         if (res <= softLimit) {
0060             res *= 10;
0061             res += digit;
0062         } else {
0063             if (res > absoluteMax / 10)
0064                 throw ParseError("extractNumber: out of range", line, start);
0065             res *= 10;
0066             if (res > absoluteMax - digit)
0067                 throw ParseError("extractNumber: out of range", line, start);
0068             res += digit;
0069         }
0070         ++c_str;
0071         ++start;
0072     }
0073     return res;
0074 }
0075 
0076 uint getUInt(const QByteArray &line, int &start)
0077 {
0078     return extractNumber<uint>(line, start);
0079 }
0080 
0081 quint64 getUInt64(const QByteArray &line, int &start)
0082 {
0083     return extractNumber<quint64>(line, start);
0084 }
0085 
0086 #define C_STR_CHECK_FOR_ATOM_CHARS \
0087     *c_str > '\x20' && *c_str != '\x7f' /* SP and CTL */ \
0088         && *c_str != '(' && *c_str != ')' && *c_str != '{' /* explicitly forbidden */ \
0089         && *c_str != '%' && *c_str != '*' /* list-wildcards */ \
0090         && *c_str != '"' && *c_str != '\\' /* quoted-specials */ \
0091         && *c_str != ']' /* resp-specials */
0092 
0093 bool startsWithNil(const QByteArray &line, int start)
0094 {
0095     const char *c_str = line.constData();
0096     c_str += start;
0097     // Case-insensitive NIL. We cannot use strncasecmp because that function respects locale settings which
0098     // is absolutely not something we want to do here.
0099     if (!(start <= line.size() + 3 && (*c_str == 'N' || *c_str == 'n') && (*(c_str+1) == 'I' || *(c_str+1) == 'i')
0100             && (*(c_str+2) == 'L' || *(c_str+2) == 'l'))) {
0101         return false;
0102     }
0103     // At this point we know that it starts with a NIL. To prevent parsing ambiguity with atoms, we have to
0104     // check the next character.
0105     c_str += 3;
0106     // That macro already checks for NULL bytes and the input is guaranteed to be null-terminated, so we're safe here
0107     if (C_STR_CHECK_FOR_ATOM_CHARS) {
0108         // The next character is apparently a valid atom-char, so this cannot possibly be a NIL
0109         return false;
0110     }
0111     return true;
0112 }
0113 
0114 QByteArray getAtom(const QByteArray &line, int &start)
0115 {
0116     if (start == line.size())
0117         throw NoData("getAtom: no data", line, start);
0118 
0119     const char *c_str = line.constData();
0120     c_str += start;
0121     const char * const old_str = c_str;
0122 
0123     while (C_STR_CHECK_FOR_ATOM_CHARS) {
0124         ++c_str;
0125     }
0126 
0127     auto size = c_str - old_str;
0128     if (!size)
0129         throw ParseError("getAtom: did not read anything", line, start);
0130     start += size;
0131     return QByteArray(old_str, size);
0132 }
0133 
0134 /** @short Special variation of getAtom which also accepts leading backslash */
0135 QByteArray getPossiblyBackslashedAtom(const QByteArray &line, int &start)
0136 {
0137     if (start == line.size())
0138         throw NoData("getPossiblyBackslashedAtom: no data", line, start);
0139 
0140     const char *c_str = line.constData();
0141     c_str += start;
0142     const char * const old_str = c_str;
0143 
0144     if (*c_str == '\\')
0145         ++c_str;
0146 
0147     while (C_STR_CHECK_FOR_ATOM_CHARS) {
0148         ++c_str;
0149     }
0150 
0151     auto size = c_str - old_str;
0152     if (!size)
0153         throw ParseError("getPossiblyBackslashedAtom: did not read anything", line, start);
0154     start += size;
0155     return QByteArray(old_str, size);
0156 }
0157 
0158 QPair<QByteArray,ParsedAs> getString(const QByteArray &line, int &start)
0159 {
0160     if (start == line.size())
0161         throw NoData("getString: no data", line, start);
0162 
0163     if (line[start] == '"') {
0164         // quoted string
0165         ++start;
0166         bool escaping = false;
0167         QByteArray res;
0168         bool terminated = false;
0169         while (start != line.size() && !terminated) {
0170             if (escaping) {
0171                 escaping = false;
0172                 if (line[start] == '"' || line[start] == '\\') {
0173                     res.append(line[start]);
0174                 } else if (line[start] == '(' || line[start] == ')') {
0175                     // Got to support broken IMAP servers like Groupwise.
0176                     // See https://bugs.kde.org/show_bug.cgi?id=334456
0177                     res.append(line[start]);
0178                     // FIXME: change this to parser warning when they're implemented
0179                     qDebug() << "IMAP parser: quoted-string escapes something else than quoted-specials";
0180                 } else {
0181                     throw UnexpectedHere("getString: escaping invalid character", line, start);
0182                 }
0183             } else {
0184                 switch (line[start]) {
0185                 case '"':
0186                     terminated = true;
0187                     break;
0188                 case '\\':
0189                     escaping = true;
0190                     break;
0191                 case '\r': case '\n':
0192                     throw ParseError("getString: premature end of quoted string", line, start);
0193                 default:
0194                     res.append(line[start]);
0195                 }
0196             }
0197             ++start;
0198         }
0199         if (!terminated)
0200             throw NoData("getString: unterminated quoted string", line, start);
0201         return qMakePair(res, QUOTED);
0202     } else if (line[start] == '{') {
0203         // literal
0204         ++start;
0205         int size = getUInt(line, start);
0206         if (line.mid(start, 3) != "}\r\n")
0207             throw ParseError("getString: malformed literal specification", line, start);
0208         start += 3;
0209         if (start + size > line.size())
0210             throw NoData("getString: run out of data", line, start);
0211         int old(start);
0212         start += size;
0213         return qMakePair(line.mid(old, size), LITERAL);
0214     } else if (start < line.size() - 3 && line[start] == '~' && line[start + 1] == '{' ) {
0215         // literal8
0216         start += 2;
0217         int size = getUInt(line, start);
0218         if (line.mid(start, 3) != "}\r\n")
0219             throw ParseError("getString: malformed literal8 specification", line, start);
0220         start += 3;
0221         if (start + size > line.size())
0222             throw NoData("getString: literal8: run out of data", line, start);
0223         int old(start);
0224         start += size;
0225         return qMakePair(line.mid(old, size), LITERAL8);
0226     } else {
0227         throw UnexpectedHere("getString: did not get quoted string or literal", line, start);
0228     }
0229 }
0230 
0231 QPair<QByteArray,ParsedAs> getAString(const QByteArray &line, int &start)
0232 {
0233     if (start >= line.size())
0234         throw NoData("getAString: no data", line, start);
0235 
0236     if (line[start] == '{' || line[start] == '"' || line[start] == '~') {
0237         return getString(line, start);
0238     } else {
0239         const char *c_str = line.constData();
0240         c_str += start;
0241         const char * const old_str = c_str;
0242         bool gotRespSpecials = false;
0243 
0244         while (true) {
0245             while (C_STR_CHECK_FOR_ATOM_CHARS) {
0246                 ++c_str;
0247             }
0248             if (*c_str == ']' /* got to explicitly allow resp-specials again...*/ ) {
0249                 ++c_str;
0250                 gotRespSpecials = true;
0251                 continue;
0252             } else {
0253                 break;
0254             }
0255         }
0256 
0257         auto size = c_str - old_str;
0258         if (!size)
0259             throw ParseError("getAString: did not read anything", line, start);
0260         start += size;
0261         return qMakePair(QByteArray(old_str, size), gotRespSpecials ? ASTRING : ATOM);
0262     }
0263 }
0264 
0265 QPair<QByteArray,ParsedAs> getNString(const QByteArray &line, int &start)
0266 {
0267     if (startsWithNil(line, start)) {
0268         start += 3;
0269         return qMakePair<>(QByteArray(), NIL);
0270     } else {
0271         return getAString(line, start);
0272     }
0273 }
0274 
0275 QString getMailbox(const QByteArray &line, int &start)
0276 {
0277     QPair<QByteArray,ParsedAs> r = getAString(line, start);
0278     if (r.first.toUpper() == "INBOX")
0279         return QStringLiteral("INBOX");
0280     else
0281         return decodeImapFolderName(r.first);
0282 
0283 }
0284 
0285 QVariantList parseList(const char open, const char close, const QByteArray &line, int &start)
0286 {
0287     if (start >= line.size())
0288         throw NoData("Could not parse list: no more data", line, start);
0289 
0290     if (line[start] == open) {
0291         // found the opening parenthesis
0292         ++start;
0293         if (start >= line.size())
0294             throw NoData("Could not parse list: just the opening bracket", line, start);
0295 
0296         QVariantList res;
0297         if (line[start] == close) {
0298             ++start;
0299             return res;
0300         }
0301         while (line[start] != close) {
0302             // We want to be benevolent here and eat extra whitespace
0303             eatSpaces(line, start);
0304             res.append(getAnything(line, start));
0305             if (start >= line.size())
0306                 throw NoData("Could not parse list: truncated data", line, start);
0307             // Eat whitespace after each token, too
0308             eatSpaces(line, start);
0309             if (line[start] == close) {
0310                 ++start;
0311                 return res;
0312             }
0313         }
0314         return res;
0315     } else {
0316         throw UnexpectedHere(std::string("Could not parse list: expected a list enclosed in ")
0317                              + open + close + ", but got something else instead", line, start);
0318     }
0319 }
0320 
0321 QVariant getAnything(const QByteArray &line, int &start)
0322 {
0323     if (start >= line.size())
0324         throw NoData("getAnything: no data", line, start);
0325 
0326     if (line[start] == '[') {
0327         QVariant res = parseList('[', ']', line, start);
0328         return res;
0329     } else if (line[start] == '(') {
0330         QVariant res = parseList('(', ')', line, start);
0331         return res;
0332     } else if (line[start] == '"' || line[start] == '{' || line[start] == '~') {
0333         QPair<QByteArray,ParsedAs> res = getString(line, start);
0334         return res.first;
0335     } else if (startsWithNil(line, start)) {
0336         start += 3;
0337         return QByteArray();
0338     } else if (line[start] == '\\') {
0339         // valid for "flag"
0340         ++start;
0341         if (start >= line.size())
0342             throw NoData("getAnything: backslash-nothing is invalid", line, start);
0343         if (line[start] == '*') {
0344             ++start;
0345             return QByteArray("\\*");
0346         }
0347         return QByteArray(QByteArray(1, '\\') + getAtom(line, start));
0348     } else {
0349         QByteArray atom = getAtom(line, start);
0350         if (atom.indexOf('[', 0) != -1) {
0351             // "BODY[something]" -- there's no whitespace between "[" and
0352             // next atom...
0353             int pos = line.indexOf(']', start);
0354             if (pos == -1)
0355                 throw ParseError("getAnything: can't find ']' for the '['", line, start);
0356             ++pos;
0357             atom += line.mid(start, pos - start);
0358             start = pos;
0359             if (start < line.size() && line[start] == '<') {
0360                 // Let's check if it continues with "<range>"
0361                 pos = line.indexOf('>', start);
0362                 if (pos == -1)
0363                     throw ParseError("getAnything: can't find proper <range>", line, start);
0364                 ++pos;
0365                 atom += line.mid(start, pos - start);
0366                 start = pos;
0367             }
0368         }
0369         return atom;
0370     }
0371 }
0372 
0373 Imap::Uids getSequence(const QByteArray &line, int &start)
0374 {
0375     uint num = LowLevelParser::getUInt(line, start);
0376     if (start >= line.size() - 2) {
0377         // It's definitely just a number because there's no more data in here
0378         return Imap::Uids() << num;
0379     } else {
0380         Imap::Uids numbers;
0381         numbers << num;
0382 
0383         enum {COMMA, RANGE} currentType = COMMA;
0384 
0385         // Try to find further items in the sequence set
0386         while (line[start] == ':' || line[start] == ',') {
0387             // it's a sequence set
0388 
0389             if (line[start] == ':') {
0390                 if (currentType == RANGE) {
0391                     // Now "x:y:z" is a funny syntax
0392                     throw UnexpectedHere("Sequence set: range cannot me defined by three numbers", line, start);
0393                 }
0394                 currentType = RANGE;
0395             } else {
0396                 currentType = COMMA;
0397             }
0398 
0399             ++start;
0400             if (start >= line.size() - 2) throw NoData("Truncated sequence set", line, start);
0401 
0402             uint num = LowLevelParser::getUInt(line, start);
0403             if (currentType == COMMA) {
0404                 // just adding one more to the set
0405                 numbers << num;
0406             } else {
0407                 // working with a range
0408                 if (numbers.last() >= num)
0409                     throw UnexpectedHere("Sequence set contains an invalid range. "
0410                                          "First item of a range must always be smaller than the second item.", line, start);
0411 
0412                 for (uint i = numbers.last() + 1; i <= num; ++i)
0413                     numbers << i;
0414             }
0415         }
0416         return numbers;
0417     }
0418 }
0419 
0420 QDateTime parseRFC2822DateTime(const QByteArray &input)
0421 {
0422     static const QMap<QString, uint> monthnumbers({ // default value is 0
0423         {QLatin1String("jan"), 1}, {QLatin1String("feb"), 2},
0424         {QLatin1String("mar"), 3}, {QLatin1String("apr"), 4},
0425         {QLatin1String("may"), 5}, {QLatin1String("jun"), 6},
0426         {QLatin1String("jul"), 7}, {QLatin1String("aug"), 8},
0427         {QLatin1String("sep"), 9}, {QLatin1String("oct"), 10},
0428         {QLatin1String("nov"), 11}, {QLatin1String("dec"), 12}
0429     });
0430 
0431     static const QMap<QString, int> tzoffsethrs({ // default value is 0
0432         {QLatin1String("EST"), 5}, {QLatin1String("EDT"), 4},
0433         {QLatin1String("CST"), 6}, {QLatin1String("CDT"), 5},
0434         {QLatin1String("MST"), 7}, {QLatin1String("MDT"), 6},
0435         {QLatin1String("PST"), 8}, {QLatin1String("PDT"), 7}
0436     });
0437 
0438     static const QRegularExpression rx(
0439               QLatin1String("^\\s*"
0440                             "(?:"
0441                                 "([A-Z][a-z]+)" // 1: day-of-week (may be empty)
0442                                 "\\s*,\\s*"
0443                             ")?"
0444                             "(\\d{1,2})" // 2: day
0445                             "\\s+"
0446                             "(") // 3: month
0447             +         QStringList(monthnumbers.keys()).join(QLatin1Char('|'))
0448                       // wrapping with QStringList because Qt 5.2 has no join for Qlist<QString>, unlike Qt >=5.5 (Qt 5.3-4?)
0449             + QLatin1String(")"
0450                             "\\s+"
0451                             "(\\d{2,4})" // 4: year
0452                             "\\s+"
0453                             "(\\d{1,2})" // 5: hours
0454                             "\\s*:\\s*"
0455                             "(\\d{1,2})" // 6: minutes
0456                             "(?:"
0457                                 "\\s*:\\s*"
0458                                 "(\\d{1,2})" // 7: seconds (may be empty)
0459                             ")?"
0460                             "(?:"
0461                                 "\\s+"
0462                                 "(?:" // timezone (some or all may be empty)
0463                                     "(" // 8: timezone offset
0464                                       "([+-]?)" // 9: offset direction
0465                                       "(\\d{2})" // 10: offset hours
0466                                       "(\\d{2})" // 11: offset minutes
0467                                     ")"
0468                                     "|"
0469                                     "(") // 12: timezone code
0470             +                QStringList(tzoffsethrs.keys()).join(QLatin1Char('|'))
0471                                          // codes not considered are ignored and implicitly assumed to correspond to UTC
0472                              // wrapping with QStringList because Qt 5.2 has no join for Qlist<QString>, unlike Qt >=5.5 (Qt 5.3-4?)
0473             + QLatin1String(        ")"
0474                                 ")"
0475                             ")?"
0476                             "\\s*"),
0477                             QRegularExpression::CaseInsensitiveOption);
0478 
0479     QRegularExpressionMatch match = rx.match(QString::fromUtf8(input));
0480     if (!match.hasMatch())
0481         throw ParseError("Date format not recognized");
0482 
0483     int year = match.captured(4).toInt();
0484     int month = monthnumbers[match.captured(3).toLower()];
0485     if (month == 0)
0486         throw ParseError("Invalid month name");
0487     int day = match.captured(2).toInt();
0488     int hours = match.captured(5).toInt();
0489     int minutes = match.captured(6).toInt();
0490     int seconds = match.captured(7).toInt();
0491     int shift(0);
0492     if (!match.captured(8).isEmpty()) {
0493         shift = (match.captured(10).toInt() * 60 + match.captured(11).toInt()) * 60;
0494         if (match.captured(9) != QLatin1String("-"))
0495             shift *= -1;
0496     } else if (!match.captured(12).isEmpty())
0497         shift = tzoffsethrs[match.captured(12).toUpper()] * 3600;
0498 
0499     return QDateTime(QDate(year, month, day), QTime(hours, minutes, seconds),
0500                      Qt::UTC).addSecs(shift); // TODO: perhaps use  Qt::OffsetFromUTC timespec instead to preserve more information
0501 }
0502 
0503 void eatSpaces(const QByteArray &line, int &start)
0504 {
0505     while (line.size() > start && line[start] == ' ')
0506         ++start;
0507 }
0508 
0509 }
0510 }