File indexing completed on 2025-03-16 12:49:37
0001 /* 0002 SPDX-FileCopyrightText: 2018 Stefan BrĂ¼ns <stefan.bruens@rwth-aachen.de> 0003 0004 SPDX-License-Identifier: LGPL-2.1-or-later 0005 */ 0006 0007 0008 #include "xmlextractor.h" 0009 #include "kfilemetadata_debug.h" 0010 #include "dublincoreextractor.h" 0011 0012 #include <QDomDocument> 0013 #include <QFile> 0014 #include <QXmlStreamReader> 0015 0016 namespace { 0017 0018 //inline QString dcElementNS() { return QStringLiteral("http://purl.org/dc/elements/1.1/"); } 0019 inline QString svgNS() { return QStringLiteral("http://www.w3.org/2000/svg"); } 0020 inline QString rdfNS() { return QStringLiteral("http://www.w3.org/1999/02/22-rdf-syntax-ns#"); } 0021 inline QString ccNS() { return QStringLiteral("http://creativecommons.org/ns#"); } 0022 0023 void extractSvgText(KFileMetaData::ExtractionResult* result, const QDomElement &node) 0024 { 0025 if (node.namespaceURI() != svgNS()) { 0026 return; 0027 } 0028 0029 if ((node.localName() == QLatin1String("g")) || 0030 (node.localName() == QLatin1String("a"))) { 0031 QDomElement e = node.firstChildElement(); 0032 for (; !e.isNull(); e = e.nextSiblingElement()) { 0033 extractSvgText(result, e); 0034 } 0035 } else if (node.localName() == QLatin1String("text")) { 0036 qCDebug(KFILEMETADATA_LOG) << node.text(); 0037 result->append(node.text()); 0038 } 0039 } 0040 0041 static const QStringList supportedMimeTypes = { 0042 QStringLiteral("application/xml"), 0043 QStringLiteral("image/svg+xml"), 0044 QStringLiteral("image/svg"), 0045 }; 0046 0047 } 0048 0049 namespace KFileMetaData 0050 { 0051 0052 XmlExtractor::XmlExtractor(QObject* parent) 0053 : ExtractorPlugin(parent) 0054 { 0055 0056 } 0057 0058 QStringList XmlExtractor::mimetypes() const 0059 { 0060 return supportedMimeTypes; 0061 } 0062 0063 void XmlExtractor::extract(ExtractionResult* result) 0064 { 0065 auto flags = result->inputFlags(); 0066 QFile file(result->inputUrl()); 0067 if (!file.open(QIODevice::ReadOnly)) { 0068 qCWarning(KFILEMETADATA_LOG) << "Document is not a valid file"; 0069 return; 0070 } 0071 0072 if ((result->inputMimetype() == QLatin1String("image/svg")) || 0073 (result->inputMimetype() == QLatin1String("image/svg+xml"))) { 0074 result->addType(Type::Image); 0075 0076 QDomDocument doc; 0077 const bool processNamespaces = true; 0078 doc.setContent(&file, processNamespaces); 0079 QDomElement svg = doc.firstChildElement(); 0080 0081 if (!svg.isNull() 0082 && svg.localName() == QLatin1String("svg") 0083 && svg.namespaceURI() == svgNS()) { 0084 0085 QDomElement e = svg.firstChildElement(); 0086 for (; !e.isNull(); e = e.nextSiblingElement()) { 0087 if (e.namespaceURI() != svgNS()) { 0088 continue; 0089 } 0090 0091 if (e.localName() == QLatin1String("metadata")) { 0092 if (!(flags & ExtractionResult::ExtractMetaData)) { 0093 continue; 0094 } 0095 0096 auto rdf = e.firstChildElement(QLatin1String("RDF")); 0097 if (rdf.isNull() || rdf.namespaceURI() != rdfNS()) { 0098 continue; 0099 } 0100 0101 auto cc = rdf.firstChildElement(QLatin1String("Work")); 0102 if (cc.isNull() || cc.namespaceURI() != ccNS()) { 0103 continue; 0104 } 0105 0106 DublinCoreExtractor::extract(result, cc); 0107 0108 } else if (e.localName() == QLatin1String("defs")) { 0109 // skip 0110 continue; 0111 } else if (flags & ExtractionResult::ExtractPlainText) { 0112 // extract 0113 extractSvgText(result, e); 0114 } 0115 } 0116 } 0117 } else { 0118 result->addType(Type::Text); 0119 0120 if (flags & ExtractionResult::ExtractPlainText) { 0121 QXmlStreamReader stream(&file); 0122 while (!stream.atEnd()) { 0123 QXmlStreamReader::TokenType token = stream.readNext(); 0124 0125 if (token == QXmlStreamReader::Characters) { 0126 QString text = stream.text().trimmed().toString(); 0127 if (!text.isEmpty()) { 0128 result->append(text); 0129 } 0130 } 0131 } 0132 } 0133 } 0134 } 0135 0136 } // namespace KFileMetaData 0137 0138 #include "moc_xmlextractor.cpp"