File indexing completed on 2025-10-26 04:59:25
0001 /* 0002 KMime, the KDE Internet mail/usenet news message library. 0003 SPDX-FileCopyrightText: 2001 the KMime authors. 0004 See file AUTHORS for details 0005 0006 SPDX-License-Identifier: LGPL-2.0-or-later 0007 */ 0008 0009 #include <config-kmime.h> 0010 0011 #include "kmime_util_p.h" 0012 #include "kmime_debug.h" 0013 0014 #include <QByteArray> 0015 #include <QChar> 0016 #include <QString> 0017 0018 #include <cctype> 0019 0020 using namespace KMime; 0021 0022 int KMime::findHeaderLineEnd(QByteArrayView src, int &dataBegin, bool *folded) 0023 { 0024 int end = dataBegin; 0025 int len = src.length() - 1; 0026 0027 if (folded) { 0028 *folded = false; 0029 } 0030 0031 if (dataBegin < 0) { 0032 // Not found 0033 return -1; 0034 } 0035 0036 if (dataBegin > len) { 0037 // No data available 0038 return len + 1; 0039 } 0040 0041 // If the first line contains nothing, but the next line starts with a space 0042 // or a tab, that means a stupid mail client has made the first header field line 0043 // entirely empty, and has folded the rest to the next line(s). 0044 if (src.at(end) == '\n' && end + 1 < len && 0045 (src[end + 1] == ' ' || src[end + 1] == '\t')) { 0046 0047 // Skip \n and first whitespace 0048 dataBegin += 2; 0049 end += 2; 0050 } 0051 0052 if (src.at(end) != '\n') { // check if the header is not empty 0053 while (true) { 0054 end = src.indexOf('\n', end + 1); 0055 if (end == -1 || end == len) { 0056 // end of string 0057 break; 0058 } else if (src[end + 1] == ' ' || src[end + 1] == '\t' || 0059 (src[end + 1] == '=' && end + 3 <= len && 0060 ((src[end + 2] == '0' && src[end + 3] == '9') || 0061 (src[end + 2] == '2' && src[end + 3] == '0')))) { 0062 // next line is header continuation or starts with =09/=20 (bug #86302) 0063 if (folded) { 0064 *folded = true; 0065 } 0066 } else { 0067 // end of header (no header continuation) 0068 break; 0069 } 0070 } 0071 } 0072 0073 if (end < 0) { 0074 end = len + 1; //take the rest of the string 0075 } 0076 return end; 0077 } 0078 0079 #if !HAVE_STRCASESTR 0080 #ifdef WIN32 0081 #define strncasecmp _strnicmp 0082 #endif 0083 static const char *strcasestr(const char *haystack, const char *needle) 0084 { 0085 /* Copied from libreplace as part of qtwebengine 5.5.1 */ 0086 const char *s; 0087 size_t nlen = strlen(needle); 0088 for (s = haystack; *s; s++) { 0089 if (toupper(*needle) == toupper(*s) && strncasecmp(s, needle, nlen) == 0) { 0090 return (char *)((uintptr_t)s); 0091 } 0092 } 0093 return NULL; 0094 } 0095 #endif 0096 0097 int KMime::indexOfHeader(const QByteArray &src, const QByteArray &name, int &end, int &dataBegin, bool *folded) 0098 { 0099 QByteArray n = name; 0100 n.append(':'); 0101 int begin = -1; 0102 0103 if (qstrnicmp(n.constData(), src.constData(), n.length()) == 0) { 0104 begin = 0; 0105 } else { 0106 n.prepend('\n'); 0107 const char *p = strcasestr(src.constData(), n.constData()); 0108 if (!p) { 0109 begin = -1; 0110 } else { 0111 begin = p - src.constData(); 0112 ++begin; 0113 } 0114 } 0115 0116 if (begin > -1) { //there is a header with the given name 0117 dataBegin = begin + name.length() + 1; //skip the name 0118 // skip the usual space after the colon 0119 if (dataBegin < src.length() && src.at(dataBegin) == ' ') { 0120 ++dataBegin; 0121 } 0122 end = findHeaderLineEnd(src, dataBegin, folded); 0123 return begin; 0124 0125 } else { 0126 end = -1; 0127 dataBegin = -1; 0128 return -1; //header not found 0129 } 0130 } 0131 0132 QByteArray KMime::extractHeader(const QByteArray &src, const QByteArray &name) 0133 { 0134 int begin; 0135 int end; 0136 bool folded; 0137 QByteArray result; 0138 0139 if (src.isEmpty() || indexOfHeader(src, name, end, begin, &folded) < 0) { 0140 return result; 0141 } 0142 0143 if (begin >= 0) { 0144 if (!folded) { 0145 result = src.mid(begin, end - begin); 0146 } else { 0147 if (end > begin) { 0148 result = unfoldHeader(src.constData() + begin, end - begin); 0149 } 0150 } 0151 } 0152 return result; 0153 } 0154 0155 QByteArray KMime::unfoldHeader(const char *header, size_t headerSize) 0156 { 0157 QByteArray result; 0158 if (headerSize == 0) { 0159 return result; 0160 } 0161 0162 // unfolding skips characters so result will be at worst headerSize long 0163 result.reserve(headerSize); 0164 0165 const char *end = header + headerSize; 0166 const char *pos = header; 0167 const char *foldBegin = nullptr; 0168 const char *foldMid = nullptr; 0169 const char *foldEnd = nullptr; 0170 while ((foldMid = strchr(pos, '\n')) && foldMid < end) { 0171 foldBegin = foldEnd = foldMid; 0172 // find the first space before the line-break 0173 while (foldBegin > header) { 0174 if (!QChar::isSpace(*(foldBegin - 1))) { 0175 break; 0176 } 0177 --foldBegin; 0178 } 0179 // find the first non-space after the line-break 0180 while (foldEnd <= end - 1) { 0181 if (QChar::isSpace(*foldEnd)) { 0182 ++foldEnd; 0183 } else if (foldEnd && *(foldEnd - 1) == '\n' && 0184 *foldEnd == '=' && foldEnd + 2 < (header + headerSize - 1) && 0185 ((*(foldEnd + 1) == '0' && 0186 *(foldEnd + 2) == '9') || 0187 (*(foldEnd + 1) == '2' && 0188 *(foldEnd + 2) == '0'))) { 0189 // bug #86302: malformed header continuation starting with =09/=20 0190 foldEnd += 3; 0191 } else { 0192 break; 0193 } 0194 } 0195 0196 result.append(pos, foldBegin - pos); 0197 if (foldBegin != pos && foldEnd < end - 1) { 0198 result += ' '; 0199 } 0200 pos = foldEnd; 0201 } 0202 if (end > pos) { 0203 result.append(pos, end - pos); 0204 } 0205 return result; 0206 } 0207 0208 QByteArray KMime::unfoldHeader(const QByteArray &header) 0209 { 0210 return unfoldHeader(header.constData(), header.size()); 0211 } 0212 0213 namespace { 0214 // state machine used by foldHeader() 0215 struct HeaderContext { 0216 unsigned int isEscapePair : 1; 0217 unsigned int isQuotedStr : 1; 0218 0219 HeaderContext() { 0220 isEscapePair = isQuotedStr = 0; 0221 } 0222 0223 void push(char c) { 0224 if (c == '\"' && !isEscapePair) { 0225 ++isQuotedStr; 0226 } else if (c == '\\' || isEscapePair) { 0227 ++isEscapePair; 0228 } 0229 } 0230 }; 0231 } 0232 0233 QByteArray KMime::foldHeader(const QByteArray &header) 0234 { 0235 // RFC 5322 section 2.1.1. "Line Length Limits" says: 0236 // 0237 // "Each line of characters MUST be no more than 998 characters, and 0238 // SHOULD be no more than 78 characters, excluding the CRLF." 0239 const int maxLen = 78; 0240 0241 if (header.length() <= maxLen) { 0242 return header; 0243 } 0244 0245 // fast forward to header body 0246 int pos = header.indexOf(':') + 1; 0247 if (pos <= 0 || pos >= header.length()) { 0248 return header; 0249 } 0250 0251 // prepare for mutating header 0252 QByteArray hdr = header; 0253 0254 // There are positions that are eligible for inserting FWS but discouraged 0255 // (e.g. existing white space within a quoted string), and there are 0256 // positions which are recommended for inserting FWS (e.g. after comma 0257 // separator of an address list). 0258 int eligible = pos; 0259 int recommended = pos; 0260 0261 // reflects start position of "current line" in byte array 0262 int start = 0; 0263 0264 HeaderContext ctx; 0265 0266 for (; true; ++pos) { 0267 if (pos - start > maxLen && eligible) { 0268 // Fold line preferably at recommended position, at eligible position 0269 // otherwise. 0270 const int fws = recommended ? recommended : eligible; 0271 hdr.insert(fws, '\n'); 0272 // We started a new line, so reset. 0273 if (eligible <= fws) { 0274 eligible = 0; 0275 } else { 0276 ++eligible; // LF 0277 } 0278 recommended = 0; 0279 start = fws + 1/* LF */; 0280 continue; 0281 } 0282 0283 if (pos >= hdr.length()) { 0284 break; 0285 } 0286 0287 // account for already inserted FWS 0288 // (NOTE: we are not caring about broken ones here) 0289 if (hdr[pos] == '\n') { 0290 recommended = eligible = 0; 0291 start = pos + 1/* LF */; 0292 } 0293 0294 // Any white space character position is eligible for folding, except of 0295 // escape pair (i.e. BSP WSP must not be folded). 0296 if (hdr[pos] == ' ' && !ctx.isEscapePair && hdr[pos - 1] != '\n') { 0297 eligible = pos; 0298 if ((hdr[pos - 1] == ',' || hdr[pos - 1] == ';') && !ctx.isQuotedStr) { 0299 recommended = pos; 0300 } 0301 } 0302 0303 ctx.push(hdr[pos]); 0304 } 0305 0306 return hdr; 0307 } 0308 0309 namespace 0310 { 0311 template < typename StringType, typename CharType > void removeQuotesGeneric(StringType &str) 0312 { 0313 bool inQuote = false; 0314 for (int i = 0; i < str.length(); ++i) { 0315 if (str[i] == CharType('"')) { 0316 str.remove(i, 1); 0317 i--; 0318 inQuote = !inQuote; 0319 } else { 0320 if (inQuote && (str[i] == CharType('\\'))) { 0321 str.remove(i, 1); 0322 } 0323 } 0324 } 0325 } 0326 } 0327 0328 void KMime::removeQuotes(QByteArray &str) 0329 { 0330 removeQuotesGeneric<QByteArray, char>(str); 0331 } 0332 0333 void KMime::removeQuotes(QString &str) 0334 { 0335 removeQuotesGeneric<QString, QLatin1Char>(str); 0336 } 0337 0338 namespace { 0339 template<class StringType, class CharConverterType> 0340 void addQuotes_impl(StringType &str, bool forceQuotes) 0341 { 0342 constexpr const char reservedCharacters[] = R"(""(),.:;<=>@[\])"; // sorted! 0343 0344 bool needsQuotes = false; 0345 for (qsizetype i = 0; i < str.length(); i++) { 0346 const auto cur = str.at(i); 0347 const auto it = std::lower_bound(std::begin(reservedCharacters), std::end(reservedCharacters), cur, [](char lhs, auto rhs) { 0348 return CharConverterType(lhs) < rhs; 0349 }); 0350 if (it != std::end(reservedCharacters) && CharConverterType(*it) == cur) { 0351 needsQuotes = true; 0352 } 0353 if (cur == CharConverterType('\\') || cur == CharConverterType('\"')) { 0354 str.insert(i, CharConverterType('\\')); 0355 i++; 0356 } 0357 } 0358 0359 if (needsQuotes || forceQuotes) { 0360 str.insert(0, CharConverterType('\"')); 0361 str.append(CharConverterType('\"')); 0362 } 0363 } 0364 } 0365 0366 void KMime::addQuotes(QByteArray &str, bool forceQuotes) 0367 { 0368 addQuotes_impl<QByteArray, char>(str, forceQuotes); 0369 } 0370 0371 void KMime::addQuotes(QString &str, bool forceQuotes) 0372 { 0373 addQuotes_impl<QString, QLatin1Char>(str, forceQuotes); 0374 } 0375 0376 QString KMime::balanceBidiState(const QString &input) 0377 { 0378 const int LRO = 0x202D; 0379 const int RLO = 0x202E; 0380 const int LRE = 0x202A; 0381 const int RLE = 0x202B; 0382 const int PDF = 0x202C; 0383 0384 QString result = input; 0385 0386 int openDirChangers = 0; 0387 int numPDFsRemoved = 0; 0388 for (int i = 0; i < input.length(); i++) { 0389 const ushort &code = input.at(i).unicode(); 0390 if (code == LRO || code == RLO || code == LRE || code == RLE) { 0391 openDirChangers++; 0392 } else if (code == PDF) { 0393 if (openDirChangers > 0) { 0394 openDirChangers--; 0395 } else { 0396 // One PDF too much, remove it 0397 qCWarning(KMIME_LOG) << "Possible Unicode spoofing (unexpected PDF) detected in" << input; 0398 result.remove(i - numPDFsRemoved, 1); 0399 numPDFsRemoved++; 0400 } 0401 } 0402 } 0403 0404 if (openDirChangers > 0) { 0405 qCWarning(KMIME_LOG) << "Possible Unicode spoofing detected in" << input; 0406 0407 // At PDF chars to the end until the correct state is restored. 0408 // As a special exception, when encountering quoted strings, place the PDF before 0409 // the last quote. 0410 for (int i = openDirChangers; i > 0; i--) { 0411 if (result.endsWith(QLatin1Char('"'))) { 0412 result.insert(result.length() - 1, QChar(PDF)); 0413 } else { 0414 result += QChar(PDF); 0415 } 0416 } 0417 } 0418 0419 return result; 0420 } 0421 0422 QString KMime::removeBidiControlChars(const QString &input) 0423 { 0424 const int LRO = 0x202D; 0425 const int RLO = 0x202E; 0426 const int LRE = 0x202A; 0427 const int RLE = 0x202B; 0428 QString result = input; 0429 result.remove(QChar(LRO)); 0430 result.remove(QChar(RLO)); 0431 result.remove(QChar(LRE)); 0432 result.remove(QChar(RLE)); 0433 return result; 0434 }