File indexing completed on 2024-05-12 15:37:07

0001 /*
0002     SPDX-FileCopyrightText: 2018 Stefan BrĂ¼ns <stefan.bruens@rwth-aachen.de>
0003 
0004     SPDX-License-Identifier: LGPL-2.1-or-later
0005 */
0006 
0007 
0008 #include "xmlextractor.h"
0009 #include "kfilemetadata_debug.h"
0010 #include "dublincoreextractor.h"
0011 
0012 #include <QDomDocument>
0013 #include <QFile>
0014 #include <QXmlStreamReader>
0015 
0016 namespace {
0017 
0018 //inline QString dcElementNS()     { return QStringLiteral("http://purl.org/dc/elements/1.1/"); }
0019 inline QString svgNS()    { return QStringLiteral("http://www.w3.org/2000/svg"); }
0020 inline QString rdfNS()    { return QStringLiteral("http://www.w3.org/1999/02/22-rdf-syntax-ns#"); }
0021 inline QString ccNS()     { return QStringLiteral("http://creativecommons.org/ns#"); }
0022 
0023 void extractSvgText(KFileMetaData::ExtractionResult* result, const QDomElement &node)
0024 {
0025     if (node.namespaceURI() != svgNS()) {
0026         return;
0027     }
0028 
0029     if ((node.localName() == QLatin1String("g")) ||
0030         (node.localName() == QLatin1String("a"))) {
0031         QDomElement e = node.firstChildElement();
0032         for (; !e.isNull(); e = e.nextSiblingElement()) {
0033             extractSvgText(result, e);
0034         }
0035     } else if (node.localName() == QLatin1String("text")) {
0036         qCDebug(KFILEMETADATA_LOG) << node.text();
0037         result->append(node.text());
0038     }
0039 }
0040 
0041 static const QStringList supportedMimeTypes = {
0042     QStringLiteral("application/xml"),
0043     QStringLiteral("image/svg+xml"),
0044     QStringLiteral("image/svg"),
0045 };
0046 
0047 }
0048 
0049 namespace KFileMetaData
0050 {
0051 
0052 XmlExtractor::XmlExtractor(QObject* parent)
0053     : ExtractorPlugin(parent)
0054 {
0055 
0056 }
0057 
0058 QStringList XmlExtractor::mimetypes() const
0059 {
0060     return supportedMimeTypes;
0061 }
0062 
0063 void XmlExtractor::extract(ExtractionResult* result)
0064 {
0065     auto flags = result->inputFlags();
0066     QFile file(result->inputUrl());
0067     if (!file.open(QIODevice::ReadOnly)) {
0068         qCWarning(KFILEMETADATA_LOG) << "Document is not a valid file";
0069         return;
0070     }
0071 
0072     if ((result->inputMimetype() == QLatin1String("image/svg")) ||
0073         (result->inputMimetype() == QLatin1String("image/svg+xml"))) {
0074         result->addType(Type::Image);
0075 
0076         QDomDocument doc;
0077         const bool processNamespaces = true;
0078         doc.setContent(&file, processNamespaces);
0079         QDomElement svg = doc.firstChildElement();
0080 
0081         if (!svg.isNull()
0082             && svg.localName() == QLatin1String("svg")
0083             && svg.namespaceURI() == svgNS()) {
0084 
0085             QDomElement e = svg.firstChildElement();
0086             for (; !e.isNull(); e = e.nextSiblingElement()) {
0087                 if (e.namespaceURI() != svgNS()) {
0088                     continue;
0089                 }
0090 
0091                 if (e.localName() == QLatin1String("metadata")) {
0092                     if (!(flags & ExtractionResult::ExtractMetaData)) {
0093                         continue;
0094                     }
0095 
0096                     auto rdf = e.firstChildElement(QLatin1String("RDF"));
0097                     if (rdf.isNull() || rdf.namespaceURI() != rdfNS()) {
0098                         continue;
0099                     }
0100 
0101                     auto cc = rdf.firstChildElement(QLatin1String("Work"));
0102                     if (cc.isNull() || cc.namespaceURI() != ccNS()) {
0103                         continue;
0104                     }
0105 
0106                     DublinCoreExtractor::extract(result, cc);
0107 
0108                 } else if (e.localName() == QLatin1String("defs")) {
0109                     // skip
0110                     continue;
0111                 } else if (flags & ExtractionResult::ExtractPlainText) {
0112                     // extract
0113                     extractSvgText(result, e);
0114                 }
0115             }
0116         }
0117     } else {
0118         result->addType(Type::Text);
0119 
0120         if (flags & ExtractionResult::ExtractPlainText) {
0121             QXmlStreamReader stream(&file);
0122             while (!stream.atEnd()) {
0123                 QXmlStreamReader::TokenType token = stream.readNext();
0124 
0125                 if (token == QXmlStreamReader::Characters) {
0126                     QString text = stream.text().trimmed().toString();
0127                     if (!text.isEmpty()) {
0128                         result->append(text);
0129                     }
0130                 }
0131             }
0132         }
0133     }
0134 }
0135 
0136 } // namespace KFileMetaData
0137 
0138 #include "moc_xmlextractor.cpp"