File indexing completed on 2024-12-29 04:49:58
0001 /* 0002 SPDX-FileCopyrightText: 2017-2021 Volker Krause <vkrause@kde.org> 0003 0004 SPDX-License-Identifier: LGPL-2.0-or-later 0005 */ 0006 0007 #include "config-kitinerary.h" 0008 #include "extractorengine.h" 0009 0010 #include "barcodedecoder.h" 0011 #include "abstractextractor.h" 0012 #include "extractordocumentnode.h" 0013 #include "extractordocumentnodefactory.h" 0014 #include "extractordocumentprocessor.h" 0015 #include "extractorresult.h" 0016 #include "extractorrepository.h" 0017 #include "extractorscriptengine_p.h" 0018 #include "jsonlddocument.h" 0019 #include "logging.h" 0020 0021 #include <KMime/Content> 0022 #include <KMime/Message> 0023 0024 #include <QDateTime> 0025 #include <QFile> 0026 #include <QFileInfo> 0027 #include <QJsonArray> 0028 #include <QJsonDocument> 0029 #include <QJsonObject> 0030 #include <QLocale> 0031 0032 #include <cstring> 0033 0034 using namespace KItinerary; 0035 0036 namespace KItinerary { 0037 0038 class ExtractorEnginePrivate { 0039 public: 0040 void processNode(ExtractorDocumentNode &node); 0041 0042 ExtractorEngine *q = nullptr; 0043 std::vector<const AbstractExtractor*> m_additionalExtractors; 0044 ExtractorDocumentNode m_rootNode; 0045 ExtractorDocumentNode m_contextNode; 0046 ExtractorDocumentNodeFactory m_nodeFactory; 0047 ExtractorRepository m_repo; 0048 BarcodeDecoder m_barcodeDecoder; 0049 ExtractorScriptEngine m_scriptEngine; 0050 ExtractorEngine::Hints m_hints = ExtractorEngine::NoHint; 0051 }; 0052 0053 } 0054 0055 void ExtractorEnginePrivate::processNode(ExtractorDocumentNode& node) 0056 { 0057 if (node.isNull()) { 0058 return; 0059 } 0060 0061 node.processor()->expandNode(node, q); 0062 for (auto c : node.childNodes()) { 0063 processNode(c); 0064 } 0065 node.processor()->reduceNode(node); 0066 0067 node.processor()->preExtract(node, q); 0068 std::vector<const AbstractExtractor*> extractors = m_additionalExtractors; 0069 m_repo.extractorsForNode(node, extractors); 0070 0071 ExtractorResult nodeResult; 0072 QString usedExtractor; 0073 for (const auto &extractor : extractors) { 0074 auto res = extractor->extract(node, q); 0075 if (!res.isEmpty()) { 0076 usedExtractor = extractor->name(); 0077 nodeResult.append(std::move(res)); 0078 } 0079 } 0080 if (!nodeResult.isEmpty()) { 0081 node.setResult(std::move(nodeResult)); 0082 node.setUsedExtractor(usedExtractor); 0083 } 0084 0085 node.processor()->postExtract(node, q); 0086 0087 // set modification time for all results that don't have it yet 0088 if (node.contextDateTime().isValid()) { 0089 auto result = node.result().jsonLdResult(); 0090 for (int i = 0; i < result.size(); ++i) { 0091 auto res = result.at(i).toObject(); 0092 if (!res.contains(QLatin1StringView("modifiedTime"))) { 0093 res.insert(QStringLiteral("modifiedTime"), 0094 node.contextDateTime().toString(Qt::ISODate)); 0095 } 0096 result[i] = res; 0097 } 0098 node.setResult(result); 0099 } 0100 } 0101 0102 0103 ExtractorEngine::ExtractorEngine() 0104 : d(new ExtractorEnginePrivate) 0105 { 0106 d->q = this; 0107 } 0108 0109 ExtractorEngine::ExtractorEngine(ExtractorEngine &&) noexcept = default; 0110 0111 ExtractorEngine::~ExtractorEngine() 0112 { 0113 // ensure we destroy nodes before we destroy the node factory 0114 clear(); 0115 } 0116 0117 void ExtractorEngine::clear() 0118 { 0119 d->m_rootNode = {}; 0120 d->m_contextNode = {}; 0121 } 0122 0123 void ExtractorEngine::setData(const QByteArray &data, QStringView fileName, QStringView mimeType) 0124 { 0125 d->m_rootNode = d->m_nodeFactory.createNode(data, fileName, mimeType); 0126 } 0127 0128 void ExtractorEngine::setContent(const QVariant &data, QStringView mimeType) 0129 { 0130 d->m_rootNode = d->m_nodeFactory.createNode(data, mimeType); 0131 } 0132 0133 void ExtractorEngine::setContext(const QVariant &data, QStringView mimeType) 0134 { 0135 d->m_contextNode = d->m_nodeFactory.createNode(data, mimeType); 0136 } 0137 0138 void ExtractorEngine::setContextDate(const QDateTime &dt) 0139 { 0140 d->m_contextNode.setContextDateTime(dt); 0141 } 0142 0143 ExtractorEngine::Hints ExtractorEngine::hints() const 0144 { 0145 return d->m_hints; 0146 } 0147 0148 void ExtractorEngine::setHints(ExtractorEngine::Hints hints) 0149 { 0150 d->m_hints = hints; 0151 } 0152 0153 QJsonArray ExtractorEngine::extract() 0154 { 0155 d->m_rootNode.setParent(d->m_contextNode); 0156 d->processNode(d->m_rootNode); 0157 return d->m_rootNode.result().jsonLdResult(); 0158 } 0159 0160 void ExtractorEngine::setUseSeparateProcess(bool separateProcess) 0161 { 0162 d->m_nodeFactory.setUseSeparateProcess(separateProcess); 0163 } 0164 0165 void ExtractorEngine::setAdditionalExtractors(std::vector<const AbstractExtractor*> &&extractors) 0166 { 0167 d->m_additionalExtractors = std::move(extractors); 0168 } 0169 0170 QString ExtractorEngine::usedCustomExtractor() const 0171 { 0172 return d->m_rootNode.usedExtractor(); 0173 } 0174 0175 const ExtractorDocumentNodeFactory* ExtractorEngine::documentNodeFactory() const 0176 { 0177 return &d->m_nodeFactory; 0178 } 0179 0180 const BarcodeDecoder* ExtractorEngine::barcodeDecoder() const 0181 { 0182 return &d->m_barcodeDecoder; 0183 } 0184 0185 const ExtractorRepository* ExtractorEngine::extractorRepository() const 0186 { 0187 return &d->m_repo; 0188 } 0189 0190 const ExtractorScriptEngine* ExtractorEngine::scriptEngine() const 0191 { 0192 d->m_scriptEngine.setExtractorEngine(const_cast<ExtractorEngine*>(this)); 0193 return &d->m_scriptEngine; 0194 } 0195 0196 ExtractorDocumentNode ExtractorEngine::rootDocumentNode() const 0197 { 0198 return d->m_rootNode; 0199 } 0200 0201 void ExtractorEngine::processNode(ExtractorDocumentNode &node) const 0202 { 0203 d->processNode(node); 0204 }