File indexing completed on 2024-09-22 04:52:49
0001 /* Copyright (C) 2006 - 2014 Jan Kundrát <jkt@flaska.net> 0002 Copyright (C) 2018 Erik Quaeghebeur <kde@equaeghe.nospammail.net> 0003 0004 This file is part of the Trojita Qt IMAP e-mail client, 0005 http://trojita.flaska.net/ 0006 0007 This program is free software; you can redistribute it and/or 0008 modify it under the terms of the GNU General Public License as 0009 published by the Free Software Foundation; either version 2 of 0010 the License or (at your option) version 3 or any later version 0011 accepted by the membership of KDE e.V. (or its successor approved 0012 by the membership of KDE e.V.), which shall act as a proxy 0013 defined in Section 14 of version 3 of the license. 0014 0015 This program is distributed in the hope that it will be useful, 0016 but WITHOUT ANY WARRANTY; without even the implied warranty of 0017 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 0018 GNU General Public License for more details. 0019 0020 You should have received a copy of the GNU General Public License 0021 along with this program. If not, see <http://www.gnu.org/licenses/>. 0022 */ 0023 0024 #include <limits> 0025 #include <QDebug> 0026 #include <QMap> 0027 #include <QPair> 0028 #include <QRegularExpression> 0029 #include <QRegularExpressionMatch> 0030 #include <QStringList> 0031 #include <QVariant> 0032 #include <QDateTime> 0033 #include "LowLevelParser.h" 0034 #include "../Exceptions.h" 0035 #include "Imap/Encoders.h" 0036 0037 namespace Imap 0038 { 0039 namespace LowLevelParser 0040 { 0041 0042 template<typename T> T extractNumber(const QByteArray &line, int &start) 0043 { 0044 if (start >= line.size()) 0045 throw NoData("extractNumber: no data", line, start); 0046 0047 const char *c_str = line.constData(); 0048 c_str += start; 0049 0050 if (*c_str < '0' || *c_str > '9') 0051 throw ParseError("extractNumber: not a number", line, start); 0052 0053 T res = 0; 0054 // well, it's an inline function, but clang still won't cache its result by default 0055 const T absoluteMax = std::numeric_limits<T>::max(); 0056 const T softLimit = (absoluteMax - 10) / 10; 0057 while (*c_str >= '0' && *c_str <= '9') { 0058 auto digit = *c_str - '0'; 0059 if (res <= softLimit) { 0060 res *= 10; 0061 res += digit; 0062 } else { 0063 if (res > absoluteMax / 10) 0064 throw ParseError("extractNumber: out of range", line, start); 0065 res *= 10; 0066 if (res > absoluteMax - digit) 0067 throw ParseError("extractNumber: out of range", line, start); 0068 res += digit; 0069 } 0070 ++c_str; 0071 ++start; 0072 } 0073 return res; 0074 } 0075 0076 uint getUInt(const QByteArray &line, int &start) 0077 { 0078 return extractNumber<uint>(line, start); 0079 } 0080 0081 quint64 getUInt64(const QByteArray &line, int &start) 0082 { 0083 return extractNumber<quint64>(line, start); 0084 } 0085 0086 #define C_STR_CHECK_FOR_ATOM_CHARS \ 0087 *c_str > '\x20' && *c_str != '\x7f' /* SP and CTL */ \ 0088 && *c_str != '(' && *c_str != ')' && *c_str != '{' /* explicitly forbidden */ \ 0089 && *c_str != '%' && *c_str != '*' /* list-wildcards */ \ 0090 && *c_str != '"' && *c_str != '\\' /* quoted-specials */ \ 0091 && *c_str != ']' /* resp-specials */ 0092 0093 bool startsWithNil(const QByteArray &line, int start) 0094 { 0095 const char *c_str = line.constData(); 0096 c_str += start; 0097 // Case-insensitive NIL. We cannot use strncasecmp because that function respects locale settings which 0098 // is absolutely not something we want to do here. 0099 if (!(start <= line.size() + 3 && (*c_str == 'N' || *c_str == 'n') && (*(c_str+1) == 'I' || *(c_str+1) == 'i') 0100 && (*(c_str+2) == 'L' || *(c_str+2) == 'l'))) { 0101 return false; 0102 } 0103 // At this point we know that it starts with a NIL. To prevent parsing ambiguity with atoms, we have to 0104 // check the next character. 0105 c_str += 3; 0106 // That macro already checks for NULL bytes and the input is guaranteed to be null-terminated, so we're safe here 0107 if (C_STR_CHECK_FOR_ATOM_CHARS) { 0108 // The next character is apparently a valid atom-char, so this cannot possibly be a NIL 0109 return false; 0110 } 0111 return true; 0112 } 0113 0114 QByteArray getAtom(const QByteArray &line, int &start) 0115 { 0116 if (start == line.size()) 0117 throw NoData("getAtom: no data", line, start); 0118 0119 const char *c_str = line.constData(); 0120 c_str += start; 0121 const char * const old_str = c_str; 0122 0123 while (C_STR_CHECK_FOR_ATOM_CHARS) { 0124 ++c_str; 0125 } 0126 0127 auto size = c_str - old_str; 0128 if (!size) 0129 throw ParseError("getAtom: did not read anything", line, start); 0130 start += size; 0131 return QByteArray(old_str, size); 0132 } 0133 0134 /** @short Special variation of getAtom which also accepts leading backslash */ 0135 QByteArray getPossiblyBackslashedAtom(const QByteArray &line, int &start) 0136 { 0137 if (start == line.size()) 0138 throw NoData("getPossiblyBackslashedAtom: no data", line, start); 0139 0140 const char *c_str = line.constData(); 0141 c_str += start; 0142 const char * const old_str = c_str; 0143 0144 if (*c_str == '\\') 0145 ++c_str; 0146 0147 while (C_STR_CHECK_FOR_ATOM_CHARS) { 0148 ++c_str; 0149 } 0150 0151 auto size = c_str - old_str; 0152 if (!size) 0153 throw ParseError("getPossiblyBackslashedAtom: did not read anything", line, start); 0154 start += size; 0155 return QByteArray(old_str, size); 0156 } 0157 0158 QPair<QByteArray,ParsedAs> getString(const QByteArray &line, int &start) 0159 { 0160 if (start == line.size()) 0161 throw NoData("getString: no data", line, start); 0162 0163 if (line[start] == '"') { 0164 // quoted string 0165 ++start; 0166 bool escaping = false; 0167 QByteArray res; 0168 bool terminated = false; 0169 while (start != line.size() && !terminated) { 0170 if (escaping) { 0171 escaping = false; 0172 if (line[start] == '"' || line[start] == '\\') { 0173 res.append(line[start]); 0174 } else if (line[start] == '(' || line[start] == ')') { 0175 // Got to support broken IMAP servers like Groupwise. 0176 // See https://bugs.kde.org/show_bug.cgi?id=334456 0177 res.append(line[start]); 0178 // FIXME: change this to parser warning when they're implemented 0179 qDebug() << "IMAP parser: quoted-string escapes something else than quoted-specials"; 0180 } else { 0181 throw UnexpectedHere("getString: escaping invalid character", line, start); 0182 } 0183 } else { 0184 switch (line[start]) { 0185 case '"': 0186 terminated = true; 0187 break; 0188 case '\\': 0189 escaping = true; 0190 break; 0191 case '\r': case '\n': 0192 throw ParseError("getString: premature end of quoted string", line, start); 0193 default: 0194 res.append(line[start]); 0195 } 0196 } 0197 ++start; 0198 } 0199 if (!terminated) 0200 throw NoData("getString: unterminated quoted string", line, start); 0201 return qMakePair(res, QUOTED); 0202 } else if (line[start] == '{') { 0203 // literal 0204 ++start; 0205 int size = getUInt(line, start); 0206 if (line.mid(start, 3) != "}\r\n") 0207 throw ParseError("getString: malformed literal specification", line, start); 0208 start += 3; 0209 if (start + size > line.size()) 0210 throw NoData("getString: run out of data", line, start); 0211 int old(start); 0212 start += size; 0213 return qMakePair(line.mid(old, size), LITERAL); 0214 } else if (start < line.size() - 3 && line[start] == '~' && line[start + 1] == '{' ) { 0215 // literal8 0216 start += 2; 0217 int size = getUInt(line, start); 0218 if (line.mid(start, 3) != "}\r\n") 0219 throw ParseError("getString: malformed literal8 specification", line, start); 0220 start += 3; 0221 if (start + size > line.size()) 0222 throw NoData("getString: literal8: run out of data", line, start); 0223 int old(start); 0224 start += size; 0225 return qMakePair(line.mid(old, size), LITERAL8); 0226 } else { 0227 throw UnexpectedHere("getString: did not get quoted string or literal", line, start); 0228 } 0229 } 0230 0231 QPair<QByteArray,ParsedAs> getAString(const QByteArray &line, int &start) 0232 { 0233 if (start >= line.size()) 0234 throw NoData("getAString: no data", line, start); 0235 0236 if (line[start] == '{' || line[start] == '"' || line[start] == '~') { 0237 return getString(line, start); 0238 } else { 0239 const char *c_str = line.constData(); 0240 c_str += start; 0241 const char * const old_str = c_str; 0242 bool gotRespSpecials = false; 0243 0244 while (true) { 0245 while (C_STR_CHECK_FOR_ATOM_CHARS) { 0246 ++c_str; 0247 } 0248 if (*c_str == ']' /* got to explicitly allow resp-specials again...*/ ) { 0249 ++c_str; 0250 gotRespSpecials = true; 0251 continue; 0252 } else { 0253 break; 0254 } 0255 } 0256 0257 auto size = c_str - old_str; 0258 if (!size) 0259 throw ParseError("getAString: did not read anything", line, start); 0260 start += size; 0261 return qMakePair(QByteArray(old_str, size), gotRespSpecials ? ASTRING : ATOM); 0262 } 0263 } 0264 0265 QPair<QByteArray,ParsedAs> getNString(const QByteArray &line, int &start) 0266 { 0267 if (startsWithNil(line, start)) { 0268 start += 3; 0269 return qMakePair<>(QByteArray(), NIL); 0270 } else { 0271 return getAString(line, start); 0272 } 0273 } 0274 0275 QString getMailbox(const QByteArray &line, int &start) 0276 { 0277 QPair<QByteArray,ParsedAs> r = getAString(line, start); 0278 if (r.first.toUpper() == "INBOX") 0279 return QStringLiteral("INBOX"); 0280 else 0281 return decodeImapFolderName(r.first); 0282 0283 } 0284 0285 QVariantList parseList(const char open, const char close, const QByteArray &line, int &start) 0286 { 0287 if (start >= line.size()) 0288 throw NoData("Could not parse list: no more data", line, start); 0289 0290 if (line[start] == open) { 0291 // found the opening parenthesis 0292 ++start; 0293 if (start >= line.size()) 0294 throw NoData("Could not parse list: just the opening bracket", line, start); 0295 0296 QVariantList res; 0297 if (line[start] == close) { 0298 ++start; 0299 return res; 0300 } 0301 while (line[start] != close) { 0302 // We want to be benevolent here and eat extra whitespace 0303 eatSpaces(line, start); 0304 res.append(getAnything(line, start)); 0305 if (start >= line.size()) 0306 throw NoData("Could not parse list: truncated data", line, start); 0307 // Eat whitespace after each token, too 0308 eatSpaces(line, start); 0309 if (line[start] == close) { 0310 ++start; 0311 return res; 0312 } 0313 } 0314 return res; 0315 } else { 0316 throw UnexpectedHere(std::string("Could not parse list: expected a list enclosed in ") 0317 + open + close + ", but got something else instead", line, start); 0318 } 0319 } 0320 0321 QVariant getAnything(const QByteArray &line, int &start) 0322 { 0323 if (start >= line.size()) 0324 throw NoData("getAnything: no data", line, start); 0325 0326 if (line[start] == '[') { 0327 QVariant res = parseList('[', ']', line, start); 0328 return res; 0329 } else if (line[start] == '(') { 0330 QVariant res = parseList('(', ')', line, start); 0331 return res; 0332 } else if (line[start] == '"' || line[start] == '{' || line[start] == '~') { 0333 QPair<QByteArray,ParsedAs> res = getString(line, start); 0334 return res.first; 0335 } else if (startsWithNil(line, start)) { 0336 start += 3; 0337 return QByteArray(); 0338 } else if (line[start] == '\\') { 0339 // valid for "flag" 0340 ++start; 0341 if (start >= line.size()) 0342 throw NoData("getAnything: backslash-nothing is invalid", line, start); 0343 if (line[start] == '*') { 0344 ++start; 0345 return QByteArray("\\*"); 0346 } 0347 return QByteArray(QByteArray(1, '\\') + getAtom(line, start)); 0348 } else { 0349 QByteArray atom = getAtom(line, start); 0350 if (atom.indexOf('[', 0) != -1) { 0351 // "BODY[something]" -- there's no whitespace between "[" and 0352 // next atom... 0353 int pos = line.indexOf(']', start); 0354 if (pos == -1) 0355 throw ParseError("getAnything: can't find ']' for the '['", line, start); 0356 ++pos; 0357 atom += line.mid(start, pos - start); 0358 start = pos; 0359 if (start < line.size() && line[start] == '<') { 0360 // Let's check if it continues with "<range>" 0361 pos = line.indexOf('>', start); 0362 if (pos == -1) 0363 throw ParseError("getAnything: can't find proper <range>", line, start); 0364 ++pos; 0365 atom += line.mid(start, pos - start); 0366 start = pos; 0367 } 0368 } 0369 return atom; 0370 } 0371 } 0372 0373 Imap::Uids getSequence(const QByteArray &line, int &start) 0374 { 0375 uint num = LowLevelParser::getUInt(line, start); 0376 if (start >= line.size() - 2) { 0377 // It's definitely just a number because there's no more data in here 0378 return Imap::Uids() << num; 0379 } else { 0380 Imap::Uids numbers; 0381 numbers << num; 0382 0383 enum {COMMA, RANGE} currentType = COMMA; 0384 0385 // Try to find further items in the sequence set 0386 while (line[start] == ':' || line[start] == ',') { 0387 // it's a sequence set 0388 0389 if (line[start] == ':') { 0390 if (currentType == RANGE) { 0391 // Now "x:y:z" is a funny syntax 0392 throw UnexpectedHere("Sequence set: range cannot me defined by three numbers", line, start); 0393 } 0394 currentType = RANGE; 0395 } else { 0396 currentType = COMMA; 0397 } 0398 0399 ++start; 0400 if (start >= line.size() - 2) throw NoData("Truncated sequence set", line, start); 0401 0402 uint num = LowLevelParser::getUInt(line, start); 0403 if (currentType == COMMA) { 0404 // just adding one more to the set 0405 numbers << num; 0406 } else { 0407 // working with a range 0408 if (numbers.last() >= num) 0409 throw UnexpectedHere("Sequence set contains an invalid range. " 0410 "First item of a range must always be smaller than the second item.", line, start); 0411 0412 for (uint i = numbers.last() + 1; i <= num; ++i) 0413 numbers << i; 0414 } 0415 } 0416 return numbers; 0417 } 0418 } 0419 0420 QDateTime parseRFC2822DateTime(const QByteArray &input) 0421 { 0422 static const QMap<QString, uint> monthnumbers({ // default value is 0 0423 {QLatin1String("jan"), 1}, {QLatin1String("feb"), 2}, 0424 {QLatin1String("mar"), 3}, {QLatin1String("apr"), 4}, 0425 {QLatin1String("may"), 5}, {QLatin1String("jun"), 6}, 0426 {QLatin1String("jul"), 7}, {QLatin1String("aug"), 8}, 0427 {QLatin1String("sep"), 9}, {QLatin1String("oct"), 10}, 0428 {QLatin1String("nov"), 11}, {QLatin1String("dec"), 12} 0429 }); 0430 0431 static const QMap<QString, int> tzoffsethrs({ // default value is 0 0432 {QLatin1String("EST"), 5}, {QLatin1String("EDT"), 4}, 0433 {QLatin1String("CST"), 6}, {QLatin1String("CDT"), 5}, 0434 {QLatin1String("MST"), 7}, {QLatin1String("MDT"), 6}, 0435 {QLatin1String("PST"), 8}, {QLatin1String("PDT"), 7} 0436 }); 0437 0438 static const QRegularExpression rx( 0439 QLatin1String("^\\s*" 0440 "(?:" 0441 "([A-Z][a-z]+)" // 1: day-of-week (may be empty) 0442 "\\s*,\\s*" 0443 ")?" 0444 "(\\d{1,2})" // 2: day 0445 "\\s+" 0446 "(") // 3: month 0447 + QStringList(monthnumbers.keys()).join(QLatin1Char('|')) 0448 // wrapping with QStringList because Qt 5.2 has no join for Qlist<QString>, unlike Qt >=5.5 (Qt 5.3-4?) 0449 + QLatin1String(")" 0450 "\\s+" 0451 "(\\d{2,4})" // 4: year 0452 "\\s+" 0453 "(\\d{1,2})" // 5: hours 0454 "\\s*:\\s*" 0455 "(\\d{1,2})" // 6: minutes 0456 "(?:" 0457 "\\s*:\\s*" 0458 "(\\d{1,2})" // 7: seconds (may be empty) 0459 ")?" 0460 "(?:" 0461 "\\s+" 0462 "(?:" // timezone (some or all may be empty) 0463 "(" // 8: timezone offset 0464 "([+-]?)" // 9: offset direction 0465 "(\\d{2})" // 10: offset hours 0466 "(\\d{2})" // 11: offset minutes 0467 ")" 0468 "|" 0469 "(") // 12: timezone code 0470 + QStringList(tzoffsethrs.keys()).join(QLatin1Char('|')) 0471 // codes not considered are ignored and implicitly assumed to correspond to UTC 0472 // wrapping with QStringList because Qt 5.2 has no join for Qlist<QString>, unlike Qt >=5.5 (Qt 5.3-4?) 0473 + QLatin1String( ")" 0474 ")" 0475 ")?" 0476 "\\s*"), 0477 QRegularExpression::CaseInsensitiveOption); 0478 0479 QRegularExpressionMatch match = rx.match(QString::fromUtf8(input)); 0480 if (!match.hasMatch()) 0481 throw ParseError("Date format not recognized"); 0482 0483 int year = match.captured(4).toInt(); 0484 int month = monthnumbers[match.captured(3).toLower()]; 0485 if (month == 0) 0486 throw ParseError("Invalid month name"); 0487 int day = match.captured(2).toInt(); 0488 int hours = match.captured(5).toInt(); 0489 int minutes = match.captured(6).toInt(); 0490 int seconds = match.captured(7).toInt(); 0491 int shift(0); 0492 if (!match.captured(8).isEmpty()) { 0493 shift = (match.captured(10).toInt() * 60 + match.captured(11).toInt()) * 60; 0494 if (match.captured(9) != QLatin1String("-")) 0495 shift *= -1; 0496 } else if (!match.captured(12).isEmpty()) 0497 shift = tzoffsethrs[match.captured(12).toUpper()] * 3600; 0498 0499 return QDateTime(QDate(year, month, day), QTime(hours, minutes, seconds), 0500 Qt::UTC).addSecs(shift); // TODO: perhaps use Qt::OffsetFromUTC timespec instead to preserve more information 0501 } 0502 0503 void eatSpaces(const QByteArray &line, int &start) 0504 { 0505 while (line.size() > start && line[start] == ' ') 0506 ++start; 0507 } 0508 0509 } 0510 }