kmime/src/kmime_util.cpp

0001 /*
0002   kmime_util.cpp
0003
0004   KMime, the KDE Internet mail/usenet news message library.
0005   SPDX-FileCopyrightText: 2001 the KMime authors.
0006   See file AUTHORS for details
0007
0008   SPDX-License-Identifier: LGPL-2.0-or-later
0009 */
0010
0011 #include "kmime_util.h"
0012 #include "kmime_util_p.h"
0013
0014 #include "kmime_charfreq_p.h"
0015 #include "kmime_debug.h"
0016 #include "kmime_header_parsing.h"
0017 #include "kmime_message.h"
0018 #include "kmime_warning_p.h"
0019
0020 #include <QCoreApplication>
0021
0022 #include <algorithm>
0023 #include <cctype>
0024 #include <cstdlib>
0025 #include <ctime>
0026
0027 using namespace KMime;
0028
0029 namespace KMime
0030 {
0031
0032 QList<QByteArray> c_harsetCache;
0033
0034 QByteArray cachedCharset(const QByteArray &name)
0035 {
0036     for (const QByteArray &charset : std::as_const(c_harsetCache)) {
0037         if (qstricmp(name.data(), charset.data()) == 0) {
0038             return charset;
0039         }
0040     }
0041
0042     c_harsetCache.append(name.toUpper());
0043     //qCDebug(KMIME_LOG) << "KNMimeBase::cachedCharset() number of cs" << c_harsetCache.count();
0044     return c_harsetCache.last();
0045 }
0046
0047 bool isUsAscii(const QString &s)
0048 {
0049     const uint sLength = s.length();
0050     for (uint i = 0; i < sLength; i++) {
0051         if (s.at(i).toLatin1() <= 0) {     // c==0: non-latin1, c<0: non-us-ascii
0052             return false;
0053         }
0054     }
0055     return true;
0056 }
0057
0058 QString nameForEncoding(Headers::contentEncoding enc)
0059 {
0060     switch (enc) {
0061     case Headers::CE7Bit: return QStringLiteral("7bit");
0062     case Headers::CE8Bit: return QStringLiteral("8bit");
0063     case Headers::CEquPr: return QStringLiteral("quoted-printable");
0064     case Headers::CEbase64: return QStringLiteral("base64");
0065     case Headers::CEuuenc: return QStringLiteral("uuencode");
0066     case Headers::CEbinary: return QStringLiteral("binary");
0067     default: return QStringLiteral("unknown");
0068     }
0069 }
0070
0071 QList<Headers::contentEncoding> encodingsForData(const QByteArray &data) {
0072     QList<Headers::contentEncoding> allowed;
0073     CharFreq cf(data);
0074
0075     switch (cf.type()) {
0076     case CharFreq::SevenBitText:
0077         allowed << Headers::CE7Bit;
0078         [[fallthrough]];
0079     case CharFreq::EightBitText:
0080         allowed << Headers::CE8Bit;
0081         [[fallthrough]];
0082     case CharFreq::SevenBitData:
0083         if (cf.printableRatio() > 5.0 / 6.0) {
0084             // let n the length of data and p the number of printable chars.
0085             // Then base64 \approx 4n/3; qp \approx p + 3(n-p)
0086             // => qp < base64 iff p > 5n/6.
0087             allowed << Headers::CEquPr;
0088             allowed << Headers::CEbase64;
0089         } else {
0090             allowed << Headers::CEbase64;
0091             allowed << Headers::CEquPr;
0092         }
0093         break;
0094     case CharFreq::EightBitData:
0095         allowed << Headers::CEbase64;
0096         break;
0097     case CharFreq::None:
0098     default:
0099         Q_ASSERT(false);
0100     }
0101
0102     return allowed;
0103 }
0104
0105 // all except specials, CTLs, SPACE.
0106 const uchar aTextMap[16] = {
0107     0x00, 0x00, 0x00, 0x00,
0108     0x5F, 0x35, 0xFF, 0xC5,
0109     0x7F, 0xFF, 0xFF, 0xE3,
0110     0xFF, 0xFF, 0xFF, 0xFE
0111 };
0112
0113 // all except tspecials, CTLs, SPACE.
0114 const uchar tTextMap[16] = {
0115     0x00, 0x00, 0x00, 0x00,
0116     0x5F, 0x36, 0xFF, 0xC0,
0117     0x7F, 0xFF, 0xFF, 0xE3,
0118     0xFF, 0xFF, 0xFF, 0xFE
0119 };
0120
0121 QByteArray uniqueString()
0122 {
0123     static const char chars[] = "0123456789abcdefghijklmnopqrstuvxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
0124     time_t now;
0125     char p[11];
0126     int ran;
0127     unsigned int timeval;
0128
0129     p[10] = '\0';
0130     now = time(nullptr);
0131     ran = 1 + (int)(1000.0 * rand() / (RAND_MAX + 1.0));
0132     timeval = (now / ran) + QCoreApplication::applicationPid();
0133
0134     for (int i = 0; i < 10; i++) {
0135         int pos = (int)(61.0 * rand() / (RAND_MAX + 1.0));
0136         //qCDebug(KMIME_LOG) << pos;
0137         p[i] = chars[pos];
0138     }
0139
0140     QByteArray ret;
0141     ret.setNum(timeval);
0142     ret += '.';
0143     ret += p;
0144
0145     return ret;
0146 }
0147
0148 QByteArray multiPartBoundary()
0149 {
0150     return "nextPart" + uniqueString();
0151 }
0152
0153 QByteArray CRLFtoLF(const QByteArray &s)
0154 {
0155     if (!s.contains("\r\n")) {
0156         return s;
0157     }
0158
0159     QByteArray ret = s;
0160     ret.replace("\r\n", "\n");
0161     return ret;
0162 }
0163
0164 QByteArray CRLFtoLF(const char *s)
0165 {
0166     QByteArray ret = s;
0167     return CRLFtoLF(ret);
0168 }
0169
0170 QByteArray LFtoCRLF(const QByteArray &s)
0171 {
0172     const int firstNewline = s.indexOf('\n');
0173     if (firstNewline == -1) {
0174         return s;
0175     }
0176     if (firstNewline > 0 && s.at(firstNewline - 1) == '\r') {
0177         // We found \r\n already, don't change anything
0178         // This check assumes that input is consistent in terms of newlines,
0179         // but so did if (s.contains("\r\n")), too.
0180         return s;
0181     }
0182
0183     QByteArray ret = s;
0184     ret.replace('\n', "\r\n");
0185     return ret;
0186 }
0187
0188 QByteArray LFtoCRLF(const char *s)
0189 {
0190     QByteArray ret = s;
0191     return LFtoCRLF(ret);
0192 }
0193
0194 bool isCryptoPart(Content *content)
0195 {
0196     auto ct = content->contentType(false);
0197     if (!ct || !ct->isMediatype("application")) {
0198         return false;
0199     }
0200
0201     const QByteArray lowerSubType = ct->subType().toLower();
0202     if (lowerSubType == "pgp-encrypted" ||
0203         lowerSubType == "pgp-signature" ||
0204         lowerSubType == "pkcs7-mime" ||
0205         lowerSubType == "x-pkcs7-mime" ||
0206         lowerSubType == "pkcs7-signature" ||
0207         lowerSubType == "x-pkcs7-signature") {
0208         return true;
0209     }
0210
0211     if (lowerSubType == "octet-stream") {
0212         auto cd = content->contentDisposition(false);
0213         if (!cd) {
0214             return false;
0215         }
0216         const auto fileName = cd->filename().toLower();
0217         return fileName == QLatin1StringView("msg.asc") ||
0218                fileName == QLatin1StringView("encrypted.asc");
0219     }
0220
0221     return false;
0222 }
0223
0224 bool isAttachment(Content* content)
0225 {
0226     if (!content) {
0227         return false;
0228     }
0229
0230     const auto contentType = content->contentType(false);
0231     // multipart/* is never an attachment itself, message/rfc822 always is
0232     if (contentType) {
0233         if (contentType->isMultipart()) {
0234             return false;
0235         }
0236         if (contentType->isMimeType("message/rfc822")) {
0237             return true;
0238         }
0239     }
0240
0241     // the main body part is not an attachment
0242     if (content->parent()) {
0243         const auto top = content->topLevel();
0244         if (content == top->textContent()) {
0245             return false;
0246         }
0247     }
0248
0249     // ignore crypto parts
0250     if (isCryptoPart(content)) {
0251         return false;
0252     }
0253
0254     // content type or content disposition having a file name set looks like an attachment
0255     const auto contentDisposition = content->contentDisposition(false);
0256     if (contentDisposition && !contentDisposition->filename().isEmpty()) {
0257         return true;
0258     }
0259
0260     if (contentType && !contentType->name().isEmpty()) {
0261         return true;
0262     }
0263
0264     // "attachment" content disposition is otherwise a good indicator though
0265     if (contentDisposition && contentDisposition->disposition() == Headers::CDattachment) {
0266         return true;
0267     }
0268
0269     return false;
0270 }
0271
0272 bool hasAttachment(Content *content)
0273 {
0274     if (!content) {
0275         return false;
0276     }
0277
0278     if (isAttachment(content)) {
0279         return true;
0280     }
0281
0282     // Ok, content itself is not an attachment. now we deal with multiparts
0283     auto ct = content->contentType(false);
0284     if (ct && ct->isMultipart() && !ct->isSubtype("related")) {// && !ct->isSubtype("alternative")) {
0285         const auto contents = content->contents();
0286         for (Content *child : contents) {
0287             if (hasAttachment(child)) {
0288                 return true;
0289             }
0290         }
0291     }
0292     return false;
0293 }
0294
0295 bool hasInvitation(Content *content)
0296 {
0297     if (!content) {
0298         return false;
0299     }
0300
0301     if (isInvitation(content)) {
0302         return true;
0303     }
0304
0305     // Ok, content itself is not an invitation. now we deal with multiparts
0306     if (content->contentType()->isMultipart()) {
0307         const auto contents = content->contents();
0308         for (Content *child : contents) {
0309             if (hasInvitation(child)) {
0310                 return true;
0311             }
0312         }
0313     }
0314     return false;
0315 }
0316
0317 bool isSigned(Message *message)
0318 {
0319     if (!message) {
0320         return false;
0321     }
0322
0323     const KMime::Headers::ContentType *const contentType = message->contentType();
0324     if (contentType->isSubtype("signed") ||
0325             contentType->isSubtype("pgp-signature") ||
0326             contentType->isSubtype("pkcs7-signature") ||
0327             contentType->isSubtype("x-pkcs7-signature") ||
0328             message->mainBodyPart("multipart/signed") ||
0329             message->mainBodyPart("application/pgp-signature") ||
0330             message->mainBodyPart("application/pkcs7-signature") ||
0331             message->mainBodyPart("application/x-pkcs7-signature")) {
0332         return true;
0333     }
0334     return false;
0335 }
0336
0337 bool isEncrypted(Message *message)
0338 {
0339     if (!message) {
0340         return false;
0341     }
0342
0343     const KMime::Headers::ContentType *const contentType = message->contentType();
0344     if (contentType->isSubtype("encrypted") ||
0345             contentType->isSubtype("pgp-encrypted") ||
0346             contentType->isSubtype("pkcs7-mime") ||
0347             contentType->isSubtype("x-pkcs7-mime") ||
0348             message->mainBodyPart("multipart/encrypted") ||
0349             message->mainBodyPart("application/pgp-encrypted") ||
0350             message->mainBodyPart("application/pkcs7-mime") ||
0351             message->mainBodyPart("application/x-pkcs7-mime")) {
0352         return true;
0353     }
0354
0355     return false;
0356 }
0357
0358 bool isInvitation(Content *content)
0359 {
0360     if (!content) {
0361         return false;
0362     }
0363
0364     const KMime::Headers::ContentType *const contentType = content->contentType(false);
0365
0366     if (contentType && contentType->isMediatype("text") && contentType->isSubtype("calendar")) {
0367         return true;
0368     }
0369
0370     return false;
0371 }
0372
0373 } // namespace KMime