File indexing completed on 2024-12-29 04:49:58

0001 /*
0002    SPDX-FileCopyrightText: 2017-2021 Volker Krause <vkrause@kde.org>
0003 
0004    SPDX-License-Identifier: LGPL-2.0-or-later
0005 */
0006 
0007 #include "config-kitinerary.h"
0008 #include "extractorengine.h"
0009 
0010 #include "barcodedecoder.h"
0011 #include "abstractextractor.h"
0012 #include "extractordocumentnode.h"
0013 #include "extractordocumentnodefactory.h"
0014 #include "extractordocumentprocessor.h"
0015 #include "extractorresult.h"
0016 #include "extractorrepository.h"
0017 #include "extractorscriptengine_p.h"
0018 #include "jsonlddocument.h"
0019 #include "logging.h"
0020 
0021 #include <KMime/Content>
0022 #include <KMime/Message>
0023 
0024 #include <QDateTime>
0025 #include <QFile>
0026 #include <QFileInfo>
0027 #include <QJsonArray>
0028 #include <QJsonDocument>
0029 #include <QJsonObject>
0030 #include <QLocale>
0031 
0032 #include <cstring>
0033 
0034 using namespace KItinerary;
0035 
0036 namespace KItinerary {
0037 
0038 class ExtractorEnginePrivate {
0039 public:
0040     void processNode(ExtractorDocumentNode &node);
0041 
0042     ExtractorEngine *q = nullptr;
0043     std::vector<const AbstractExtractor*> m_additionalExtractors;
0044     ExtractorDocumentNode m_rootNode;
0045     ExtractorDocumentNode m_contextNode;
0046     ExtractorDocumentNodeFactory m_nodeFactory;
0047     ExtractorRepository m_repo;
0048     BarcodeDecoder m_barcodeDecoder;
0049     ExtractorScriptEngine m_scriptEngine;
0050     ExtractorEngine::Hints m_hints = ExtractorEngine::NoHint;
0051 };
0052 
0053 }
0054 
0055 void ExtractorEnginePrivate::processNode(ExtractorDocumentNode& node)
0056 {
0057     if (node.isNull()) {
0058         return;
0059     }
0060 
0061     node.processor()->expandNode(node, q);
0062     for (auto c : node.childNodes()) {
0063         processNode(c);
0064     }
0065     node.processor()->reduceNode(node);
0066 
0067     node.processor()->preExtract(node, q);
0068     std::vector<const AbstractExtractor*> extractors = m_additionalExtractors;
0069     m_repo.extractorsForNode(node, extractors);
0070 
0071     ExtractorResult nodeResult;
0072     QString usedExtractor;
0073     for (const auto &extractor : extractors) {
0074         auto res = extractor->extract(node, q);
0075         if (!res.isEmpty()) {
0076             usedExtractor = extractor->name();
0077             nodeResult.append(std::move(res));
0078         }
0079     }
0080     if (!nodeResult.isEmpty()) {
0081         node.setResult(std::move(nodeResult));
0082         node.setUsedExtractor(usedExtractor);
0083     }
0084 
0085     node.processor()->postExtract(node, q);
0086 
0087     // set modification time for all results that don't have it yet
0088     if (node.contextDateTime().isValid()) {
0089         auto result = node.result().jsonLdResult();
0090         for (int i = 0; i < result.size(); ++i) {
0091             auto res = result.at(i).toObject();
0092             if (!res.contains(QLatin1StringView("modifiedTime"))) {
0093               res.insert(QStringLiteral("modifiedTime"),
0094                          node.contextDateTime().toString(Qt::ISODate));
0095             }
0096             result[i] = res;
0097         }
0098         node.setResult(result);
0099     }
0100 }
0101 
0102 
0103 ExtractorEngine::ExtractorEngine()
0104     : d(new ExtractorEnginePrivate)
0105 {
0106     d->q = this;
0107 }
0108 
0109 ExtractorEngine::ExtractorEngine(ExtractorEngine &&) noexcept = default;
0110 
0111 ExtractorEngine::~ExtractorEngine()
0112 {
0113     // ensure we destroy nodes before we destroy the node factory
0114     clear();
0115 }
0116 
0117 void ExtractorEngine::clear()
0118 {
0119     d->m_rootNode = {};
0120     d->m_contextNode = {};
0121 }
0122 
0123 void ExtractorEngine::setData(const QByteArray &data, QStringView fileName, QStringView mimeType)
0124 {
0125     d->m_rootNode = d->m_nodeFactory.createNode(data, fileName, mimeType);
0126 }
0127 
0128 void ExtractorEngine::setContent(const QVariant &data, QStringView mimeType)
0129 {
0130     d->m_rootNode = d->m_nodeFactory.createNode(data, mimeType);
0131 }
0132 
0133 void ExtractorEngine::setContext(const QVariant &data, QStringView mimeType)
0134 {
0135     d->m_contextNode = d->m_nodeFactory.createNode(data, mimeType);
0136 }
0137 
0138 void ExtractorEngine::setContextDate(const QDateTime &dt)
0139 {
0140     d->m_contextNode.setContextDateTime(dt);
0141 }
0142 
0143 ExtractorEngine::Hints ExtractorEngine::hints() const
0144 {
0145     return d->m_hints;
0146 }
0147 
0148 void ExtractorEngine::setHints(ExtractorEngine::Hints hints)
0149 {
0150     d->m_hints = hints;
0151 }
0152 
0153 QJsonArray ExtractorEngine::extract()
0154 {
0155     d->m_rootNode.setParent(d->m_contextNode);
0156     d->processNode(d->m_rootNode);
0157     return d->m_rootNode.result().jsonLdResult();
0158 }
0159 
0160 void ExtractorEngine::setUseSeparateProcess(bool separateProcess)
0161 {
0162     d->m_nodeFactory.setUseSeparateProcess(separateProcess);
0163 }
0164 
0165 void ExtractorEngine::setAdditionalExtractors(std::vector<const AbstractExtractor*> &&extractors)
0166 {
0167     d->m_additionalExtractors = std::move(extractors);
0168 }
0169 
0170 QString ExtractorEngine::usedCustomExtractor() const
0171 {
0172     return d->m_rootNode.usedExtractor();
0173 }
0174 
0175 const ExtractorDocumentNodeFactory* ExtractorEngine::documentNodeFactory() const
0176 {
0177     return &d->m_nodeFactory;
0178 }
0179 
0180 const BarcodeDecoder* ExtractorEngine::barcodeDecoder() const
0181 {
0182     return &d->m_barcodeDecoder;
0183 }
0184 
0185 const ExtractorRepository* ExtractorEngine::extractorRepository() const
0186 {
0187     return &d->m_repo;
0188 }
0189 
0190 const ExtractorScriptEngine* ExtractorEngine::scriptEngine() const
0191 {
0192     d->m_scriptEngine.setExtractorEngine(const_cast<ExtractorEngine*>(this));
0193     return &d->m_scriptEngine;
0194 }
0195 
0196 ExtractorDocumentNode ExtractorEngine::rootDocumentNode() const
0197 {
0198     return d->m_rootNode;
0199 }
0200 
0201 void ExtractorEngine::processNode(ExtractorDocumentNode &node) const
0202 {
0203     d->processNode(node);
0204 }