File indexing completed on 2024-04-14 03:58:25

0001 /*
0002     This file is part of the syndication library
0003     SPDX-FileCopyrightText: 2006 Frank Osterfeld <osterfeld@kde.org>
0004 
0005     SPDX-License-Identifier: LGPL-2.0-or-later
0006 */
0007 
0008 #include "parser.h"
0009 #include "constants.h"
0010 #include "content.h"
0011 #include "document.h"
0012 
0013 #include <documentsource.h>
0014 
0015 #include <QDomAttr>
0016 #include <QDomDocument>
0017 #include <QDomElement>
0018 #include <QDomNamedNodeMap>
0019 #include <QDomNode>
0020 #include <QDomNodeList>
0021 
0022 #include <QHash>
0023 #include <QString>
0024 
0025 namespace Syndication
0026 {
0027 namespace Atom
0028 {
0029 class SYNDICATION_NO_EXPORT Parser::ParserPrivate
0030 {
0031 public:
0032     static QDomDocument convertAtom0_3(const QDomDocument &document);
0033     static QDomNode convertNode(QDomDocument &doc, const QDomNode &node, const QHash<QString, QString> &nameMapper);
0034 };
0035 
0036 bool Parser::accept(const Syndication::DocumentSource &source) const
0037 {
0038     QDomElement root = source.asDomDocument().documentElement();
0039     return !root.isNull() && (root.namespaceURI() == atom1Namespace() || root.namespaceURI() == atom0_3Namespace());
0040 }
0041 
0042 Syndication::SpecificDocumentPtr Parser::parse(const Syndication::DocumentSource &source) const
0043 {
0044     QDomDocument doc = source.asDomDocument();
0045 
0046     if (doc.isNull()) {
0047         // if this is not atom, return an invalid feed document
0048         return FeedDocumentPtr(new FeedDocument());
0049     }
0050 
0051     QDomElement feed = doc.namedItem(QStringLiteral("feed")).toElement();
0052 
0053     bool feedValid = !feed.isNull();
0054 
0055     if (feedValid //
0056         && feed.attribute(QStringLiteral("version")) == QLatin1String("0.3")) {
0057         doc = ParserPrivate::convertAtom0_3(doc);
0058         feed = doc.namedItem(QStringLiteral("feed")).toElement();
0059     }
0060 
0061     feedValid = !feed.isNull() && feed.namespaceURI() == atom1Namespace();
0062 
0063     if (feedValid) {
0064         return FeedDocumentPtr(new FeedDocument(feed));
0065     }
0066 
0067     QDomElement entry = doc.namedItem(QStringLiteral("entry")).toElement();
0068     bool entryValid = !entry.isNull() && entry.namespaceURI() == atom1Namespace();
0069 
0070     if (entryValid) {
0071         return EntryDocumentPtr(new EntryDocument(feed));
0072     }
0073 
0074     // if this is not atom, return an invalid feed document
0075     return FeedDocumentPtr(new FeedDocument());
0076 }
0077 
0078 QString Parser::format() const
0079 {
0080     return QStringLiteral("atom");
0081 }
0082 
0083 QDomNode Parser::ParserPrivate::convertNode(QDomDocument &doc, const QDomNode &node, const QHash<QString, QString> &nameMapper)
0084 {
0085     if (!node.isElement()) {
0086         return node.cloneNode(true);
0087     }
0088 
0089     bool isAtom03Element = node.namespaceURI() == atom0_3Namespace();
0090     QDomElement oldEl = node.toElement();
0091 
0092     // use new namespace
0093     QString newNS = isAtom03Element ? atom1Namespace() : node.namespaceURI();
0094 
0095     QString newName = node.localName();
0096 
0097     // rename tags that are listed in the nameMapper
0098     if (isAtom03Element && nameMapper.contains(node.localName())) {
0099         newName = nameMapper[node.localName()];
0100     }
0101 
0102     QDomElement newEl = doc.createElementNS(newNS, newName);
0103 
0104     QDomNamedNodeMap attributes = oldEl.attributes();
0105 
0106     // copy over attributes
0107     const int numberOfAttributes(attributes.count());
0108     for (int i = 0; i < numberOfAttributes; ++i) {
0109         const QDomAttr attr = attributes.item(i).toAttr();
0110         if (attr.namespaceURI().isEmpty()) {
0111             newEl.setAttribute(attr.name(), attr.value());
0112         } else {
0113             newEl.setAttributeNS(attr.namespaceURI(), attr.name(), attr.value());
0114         }
0115     }
0116 
0117     /* clang-format off */
0118     bool isTextConstruct = newNS == atom1Namespace()
0119                            && (newName == QLatin1String("title")
0120                                || newName == QLatin1String("rights")
0121                                || newName == QLatin1String("subtitle")
0122                                || newName == QLatin1String("summary"));
0123     /* clang-format on */
0124 
0125     // for atom text constructs, map to new type schema (which only allows text, type, xhtml)
0126 
0127     if (isTextConstruct) {
0128         QString oldType = newEl.attribute(QStringLiteral("type"), QStringLiteral("text/plain"));
0129         QString newType;
0130 
0131         Content::Format format = Content::mapTypeToFormat(oldType);
0132         switch (format) {
0133         case Content::XML:
0134             newType = QStringLiteral("xhtml");
0135             break;
0136         case Content::EscapedHTML:
0137             newType = QStringLiteral("html");
0138             break;
0139         case Content::PlainText:
0140         case Content::Binary:
0141         default:
0142             newType = QStringLiteral("text");
0143         }
0144 
0145         newEl.setAttribute(QStringLiteral("type"), newType);
0146     } else {
0147         // for generator, rename the "url" attribute to "uri"
0148 
0149         bool isGenerator = newNS == atom1Namespace() && newName == QLatin1String("generator");
0150         if (isGenerator && newEl.hasAttribute(QStringLiteral("url"))) {
0151             newEl.setAttribute(QStringLiteral("uri"), newEl.attribute(QStringLiteral("url")));
0152         }
0153     }
0154 
0155     // process child nodes recursively and append them
0156     QDomNodeList children = node.childNodes();
0157     for (int i = 0; i < children.count(); ++i) {
0158         newEl.appendChild(convertNode(doc, children.item(i), nameMapper));
0159     }
0160 
0161     return newEl;
0162 }
0163 
0164 QDomDocument Parser::ParserPrivate::convertAtom0_3(const QDomDocument &doc03)
0165 {
0166     QDomDocument doc = doc03.cloneNode(false).toDocument();
0167 
0168     // these are the tags that were renamed in 1.0
0169     QHash<QString, QString> nameMapper;
0170     nameMapper.insert(QStringLiteral("issued"), QStringLiteral("published"));
0171     nameMapper.insert(QStringLiteral("modified"), QStringLiteral("updated"));
0172     nameMapper.insert(QStringLiteral("url"), QStringLiteral("uri"));
0173     nameMapper.insert(QStringLiteral("copyright"), QStringLiteral("rights"));
0174     nameMapper.insert(QStringLiteral("tagline"), QStringLiteral("subtitle"));
0175 
0176     const QDomNodeList children = doc03.childNodes();
0177 
0178     for (int i = 0; i < children.count(); ++i) {
0179         doc.appendChild(convertNode(doc, children.item(i), nameMapper));
0180     }
0181 
0182     return doc;
0183 }
0184 
0185 Parser::Parser()
0186     : d(nullptr)
0187 {
0188     Q_UNUSED(d) // silence -Wunused-private-field
0189 }
0190 
0191 Parser::~Parser() = default;
0192 
0193 Parser::Parser(const Parser &other)
0194     : AbstractParser(other)
0195     , d(nullptr)
0196 {
0197 }
0198 Parser &Parser::operator=(const Parser & /*other*/)
0199 {
0200     return *this;
0201 }
0202 
0203 } // namespace Atom
0204 } // namespace Syndication