File indexing completed on 2025-01-19 04:46:50

0001 /*
0002    SPDX-FileCopyrightText: 2017 Volker Krause <vkrause@kde.org>
0003 
0004    SPDX-License-Identifier: LGPL-2.0-or-later
0005 */
0006 
0007 #include "itineraryprocessor.h"
0008 #include "itinerary_debug.h"
0009 #include "itinerarymemento.h"
0010 
0011 #include <KItinerary/CreativeWork>
0012 #include <KItinerary/DocumentUtil>
0013 #include <KItinerary/Event>
0014 #include <KItinerary/ExtractorDocumentNode>
0015 #include <KItinerary/ExtractorDocumentNodeFactory>
0016 #include <KItinerary/ExtractorEngine>
0017 #include <KItinerary/JsonLdDocument>
0018 #include <KItinerary/Reservation>
0019 
0020 #include <KPkPass/Pass>
0021 
0022 #include <QJsonArray>
0023 #include <QJsonDocument>
0024 
0025 using namespace KItinerary;
0026 
0027 static bool isPkPassContent(KMime::Content *content)
0028 {
0029     const auto ct = content->contentType();
0030     const QByteArray mimetype = ct->mimeType();
0031     if (mimetype == "application/vnd.apple.pkpass") {
0032         return true;
0033     }
0034     if (mimetype != "application/octet-stream" && mimetype != "application/zip") {
0035         return false;
0036     }
0037     if (ct->name().endsWith(QLatin1StringView("pkpass"))) {
0038         return true;
0039     }
0040     const auto cd = content->contentDisposition(false);
0041     return cd && cd->filename().endsWith(QLatin1StringView("pkpass"));
0042 }
0043 
0044 static bool isCalendarContent(KMime::Content *content)
0045 {
0046     const auto ct = content->contentType();
0047     const QByteArray mimetype = ct ? ct->mimeType() : QByteArray();
0048     if (mimetype == "text/calendar" || mimetype == "application/ics") {
0049         return true;
0050     }
0051     if (mimetype != "text/plain" && mimetype != "application/octet-stream") {
0052         return false;
0053     }
0054     if (ct && ct->name().endsWith(QLatin1StringView(".ics"))) {
0055         return true;
0056     }
0057     const auto cd = content->contentDisposition(false);
0058     return cd && cd->filename().endsWith(QLatin1StringView(".ics"));
0059 }
0060 
0061 static KMime::Content *findMultipartRelatedParent(KMime::Content *node)
0062 {
0063     while (node) {
0064         if (node->contentType()->mimeType() == "multipart/related") {
0065             return node;
0066         }
0067         node = node->parent();
0068     }
0069     return nullptr;
0070 }
0071 
0072 ItineraryProcessor::ItineraryProcessor() = default;
0073 ItineraryProcessor::~ItineraryProcessor() = default;
0074 
0075 MimeTreeParser::MessagePart::Ptr ItineraryProcessor::process(MimeTreeParser::Interface::BodyPart &part) const
0076 {
0077     auto nodeHelper = part.nodeHelper();
0078     if (!nodeHelper) {
0079         return {};
0080     }
0081 
0082     // determine sender date of the current part (differs from topLevel()->date() for forwarded mails
0083     bool contextIsToplevel = false;
0084     QDateTime senderDateTime;
0085     auto node = part.content();
0086     auto dateHdr = node->header<KMime::Headers::Date>();
0087     while (!dateHdr && node->parent()) {
0088         node = node->parent();
0089         dateHdr = node->header<KMime::Headers::Date>();
0090     }
0091     if (!dateHdr) { // search outside of the current MIME tree if necessary, relevant e.g. for encrypted nodes
0092         node = part.topLevelContent();
0093         if (node) {
0094             contextIsToplevel = true;
0095         }
0096         while (!dateHdr && node) {
0097             dateHdr = node->header<KMime::Headers::Date>();
0098             node = node->parent();
0099         }
0100     }
0101     if (dateHdr) {
0102         senderDateTime = dateHdr->dateTime();
0103     }
0104 
0105     auto memento = dynamic_cast<ItineraryMemento *>(nodeHelper->bodyPartMemento(part.topLevelContent(), ItineraryMemento::identifier()));
0106     if (!memento) {
0107         memento = new ItineraryMemento;
0108         memento->setMessageDate(senderDateTime);
0109         nodeHelper->setBodyPartMemento(part.topLevelContent(), ItineraryMemento::identifier(), memento);
0110     }
0111 
0112     // check if we still have to do anything at all
0113     if (memento->isParsed(part.content()->index())) {
0114         return {};
0115     }
0116     memento->setParsed(part.content()->index());
0117 
0118     std::unique_ptr<KPkPass::Pass> pass;
0119     bool isPdf = false;
0120 
0121     ExtractorEngine engine;
0122     engine.setUseSeparateProcess(true);
0123     engine.setContext(QVariant::fromValue<KMime::Content *>(contextIsToplevel ? part.topLevelContent() : part.content()), u"message/rfc822");
0124     if (isPkPassContent(part.content())) {
0125         pass.reset(KPkPass::Pass::fromData(part.content()->decodedContent()));
0126         engine.setContent(QVariant::fromValue<KPkPass::Pass *>(pass.get()), u"application/vnd.apple.pkpass");
0127     } else if (part.content()->contentType()->isHTMLText()) {
0128         engine.setContent(part.content()->decodedText(), u"text/html");
0129         // find embedded images that belong to this HTML part, and create child-nodes for those
0130         // this is needed for finding barcodes in those images
0131         if (const auto rootNode = findMultipartRelatedParent(part.content())) {
0132             const auto children = rootNode->contents();
0133             for (const auto node : children) {
0134                 const auto ct = node->contentType(false);
0135                 if (!ct || !node->contentID(false)) {
0136                     continue;
0137                 }
0138                 if (ct->mimeType() == "image/png" || ct->mimeType() == "image/gif") {
0139                     auto pngNode = engine.documentNodeFactory()->createNode(node->decodedContent(), {}, QString::fromUtf8(ct->mimeType()));
0140                     engine.rootDocumentNode().appendChild(pngNode);
0141                 }
0142             }
0143         }
0144     } else if (part.content()->contentType()->mimeType() == "application/pdf"
0145                || part.content()->contentType()->name().endsWith(QLatin1StringView(".pdf"), Qt::CaseInsensitive)) {
0146         isPdf = true;
0147         engine.setData(part.content()->decodedContent());
0148     } else if (isCalendarContent(part.content())) {
0149         engine.setData(part.content()->decodedContent());
0150     } else if (part.content()->contentType()->isPlainText()) {
0151         engine.setContent(part.content()->decodedText(), u"text/plain");
0152     } else {
0153         return {};
0154     }
0155 
0156     const auto data = engine.extract();
0157     // qCDebug(ITINERARY_LOG).noquote() << QJsonDocument(data).toJson();
0158     auto decodedData = JsonLdDocument::fromJson(data);
0159 
0160     for (auto it = decodedData.begin(); it != decodedData.end(); ++it) {
0161         if (JsonLd::isA<Event>(*it)) { // promote Event to EventReservation
0162             EventReservation res;
0163             res.setReservationFor(*it);
0164             *it = res;
0165         }
0166     }
0167 
0168     if (!decodedData.isEmpty()) {
0169         if (isPdf) {
0170             const auto docData = part.content()->decodedContent();
0171             const auto docId = DocumentUtil::idForContent(docData);
0172             DigitalDocument docInfo;
0173             docInfo.setEncodingFormat(QStringLiteral("application/pdf"));
0174             docInfo.setName(MimeTreeParser::NodeHelper::fileName(part.content()));
0175             memento->addDocument(docId, docInfo, docData);
0176 
0177             for (auto &res : decodedData) {
0178                 DocumentUtil::addDocumentId(res, docId);
0179             }
0180         }
0181 
0182         memento->appendData(decodedData);
0183     }
0184 
0185     if (pass) {
0186         memento->addPass(pass.get(), part.content()->decodedContent());
0187     }
0188 
0189     qCDebug(ITINERARY_LOG) << "-------------------------------------------- END ITINERARY PARSING";
0190     return {};
0191 }