File indexing completed on 2024-12-01 12:41:52

0001 /*
0002     This file is part of the syndication library
0003     SPDX-FileCopyrightText: 2005 Frank Osterfeld <osterfeld@kde.org>
0004 
0005     SPDX-License-Identifier: LGPL-2.0-or-later
0006 */
0007 
0008 #include <constants.h>
0009 #include <rss2/category.h>
0010 #include <rss2/enclosure.h>
0011 #include <rss2/item.h>
0012 #include <rss2/source.h>
0013 #include <rss2/tools_p.h>
0014 #include <specificitem.h>
0015 #include <specificitemvisitor.h>
0016 #include <tools.h>
0017 
0018 #include <QDomElement>
0019 #include <QList>
0020 #include <QString>
0021 
0022 #include <vector>
0023 
0024 namespace Syndication
0025 {
0026 namespace RSS2
0027 {
0028 class SYNDICATION_NO_EXPORT Item::ItemPrivate
0029 {
0030 public:
0031     QSharedPointer<Document> doc;
0032 };
0033 
0034 Item::Item(QSharedPointer<Document> doc)
0035     : ElementWrapper()
0036     , d(new ItemPrivate)
0037 {
0038     d->doc = doc;
0039 }
0040 
0041 Item::Item(const QDomElement &element, QSharedPointer<Document> doc)
0042     : ElementWrapper(element)
0043     , d(new ItemPrivate)
0044 {
0045     d->doc = doc;
0046 }
0047 
0048 Item::~Item()
0049 {
0050 }
0051 
0052 Item::Item(const Item &other)
0053     : ElementWrapper(other)
0054     , SpecificItem(other)
0055 {
0056     d = other.d;
0057 }
0058 
0059 Item &Item::operator=(const Item &other)
0060 {
0061     ElementWrapper::operator=(other);
0062     SpecificItem::operator=(other);
0063     d = other.d;
0064     return *this;
0065 }
0066 
0067 QString Item::title() const
0068 {
0069     if (!d->doc) {
0070         return originalTitle();
0071     }
0072 
0073     bool isCDATA = false;
0074     bool containsMarkup = false;
0075     d->doc->getItemTitleFormatInfo(&isCDATA, &containsMarkup);
0076 
0077     return normalize(originalTitle(), isCDATA, containsMarkup);
0078 }
0079 
0080 QString Item::originalDescription() const
0081 {
0082     return extractElementTextNS(QString(), QStringLiteral("description"));
0083 }
0084 
0085 QString Item::originalTitle() const
0086 {
0087     return extractElementTextNS(QString(), QStringLiteral("title"));
0088 }
0089 
0090 QString Item::link() const
0091 {
0092     QString url = extractElementTextNS(QString(), QStringLiteral("link"));
0093     if (url.startsWith(QLatin1String("http://")) || url.startsWith(QLatin1String("https://"))) {
0094         return url;
0095     }
0096     if (url.isEmpty()) {
0097         return QString();
0098     }
0099     if (d->doc->link().isEmpty()) {
0100         return url;
0101     }
0102     // link does not look like a complete url, assume the feed author expects
0103     // the doc link to provide the base of the url.
0104     QString baseUrl = d->doc->link();
0105     if (url.startsWith(QLatin1Char('/')) || baseUrl.endsWith(QLatin1Char('/'))) {
0106         return baseUrl + url;
0107     } else {
0108         return baseUrl + QLatin1Char('/') + url;
0109     }
0110 }
0111 
0112 QString Item::description() const
0113 {
0114     if (!d->doc) {
0115         return originalDescription();
0116     }
0117 
0118     bool isCDATA = false;
0119     bool containsMarkup = false;
0120     d->doc->getItemDescriptionFormatInfo(&isCDATA, &containsMarkup);
0121 
0122     return normalize(originalDescription(), isCDATA, containsMarkup);
0123 }
0124 
0125 QString Item::content() const
0126 {
0127     // parse encoded stuff from content:encoded, xhtml:body and friends into content
0128     return extractContent(*this);
0129 }
0130 
0131 QList<Category> Item::categories() const
0132 {
0133     const QList<QDomElement> cats = elementsByTagNameNS(QString(), QStringLiteral("category"));
0134 
0135     QList<Category> categories;
0136     categories.reserve(cats.count());
0137 
0138     std::transform(cats.cbegin(), cats.cend(), std::back_inserter(categories), [](const QDomElement &element) {
0139         return Category(element);
0140     });
0141 
0142     return categories;
0143 }
0144 
0145 QString Item::comments() const
0146 {
0147     return extractElementTextNS(QString(), QStringLiteral("comments"));
0148 }
0149 
0150 QString Item::author() const
0151 {
0152     QString a = extractElementTextNS(QString(), QStringLiteral("author"));
0153     if (!a.isNull()) {
0154         return a;
0155     } else {
0156         // if author is not available, fall back to dc:creator
0157         return extractElementTextNS(dublinCoreNamespace(), QStringLiteral("creator"));
0158     }
0159 }
0160 
0161 QList<Enclosure> Item::enclosures() const
0162 {
0163     const QList<QDomElement> encs = elementsByTagNameNS(QString(), QStringLiteral("enclosure"));
0164 
0165     QList<Enclosure> enclosures;
0166     enclosures.reserve(encs.count());
0167 
0168     std::transform(encs.cbegin(), encs.cend(), std::back_inserter(enclosures), [](const QDomElement &element) {
0169         return Enclosure(element);
0170     });
0171 
0172     return enclosures;
0173 }
0174 
0175 QString Item::guid() const
0176 {
0177     return extractElementTextNS(QString(), QStringLiteral("guid"));
0178 }
0179 
0180 bool Item::guidIsPermaLink() const
0181 {
0182     bool guidIsPermaLink = true; // true is default
0183 
0184     QDomElement guidNode = firstElementByTagNameNS(QString(), QStringLiteral("guid"));
0185     if (!guidNode.isNull()) {
0186         if (guidNode.attribute(QStringLiteral("isPermaLink")) == QLatin1String("false")) {
0187             guidIsPermaLink = false;
0188         }
0189     }
0190 
0191     return guidIsPermaLink;
0192 }
0193 
0194 time_t Item::pubDate() const
0195 {
0196     QString str = extractElementTextNS(QString(), QStringLiteral("pubDate"));
0197 
0198     if (!str.isNull()) {
0199         return parseDate(str, RFCDate);
0200     }
0201 
0202     // if there is no pubDate, check for dc:date
0203     str = extractElementTextNS(dublinCoreNamespace(), QStringLiteral("date"));
0204     return parseDate(str, ISODate);
0205 }
0206 
0207 time_t Item::expirationDate() const
0208 {
0209     QString str = extractElementTextNS(QString(), QStringLiteral("expirationDate"));
0210     return parseDate(str, RFCDate);
0211 }
0212 
0213 Source Item::source() const
0214 {
0215     return Source(firstElementByTagNameNS(QString(), QStringLiteral("source")));
0216 }
0217 
0218 QString Item::rating() const
0219 {
0220     return extractElementTextNS(QString(), QStringLiteral("rating"));
0221 }
0222 
0223 QString Item::debugInfo() const
0224 {
0225     QString info = QLatin1String("### Item: ###################\n");
0226     if (!title().isNull()) {
0227         info += QLatin1String("title: #") + title() + QLatin1String("#\n");
0228     }
0229     if (!link().isNull()) {
0230         info += QLatin1String("link: #") + link() + QLatin1String("#\n");
0231     }
0232     if (!description().isNull()) {
0233         info += QLatin1String("description: #") + description() + QLatin1String("#\n");
0234     }
0235     if (!content().isNull()) {
0236         info += QLatin1String("content: #") + content() + QLatin1String("#\n");
0237     }
0238     if (!author().isNull()) {
0239         info += QLatin1String("author: #") + author() + QLatin1String("#\n");
0240     }
0241     if (!comments().isNull()) {
0242         info += QLatin1String("comments: #") + comments() + QLatin1String("#\n");
0243     }
0244     QString dpubdate = dateTimeToString(pubDate());
0245     if (!dpubdate.isNull()) {
0246         info += QLatin1String("pubDate: #") + dpubdate + QLatin1String("#\n");
0247     }
0248     if (!guid().isNull()) {
0249         info += QLatin1String("guid: #") + guid() + QLatin1String("#\n");
0250     }
0251     if (guidIsPermaLink()) {
0252         info += QLatin1String("guid is PL: #true#\n");
0253     }
0254     if (!source().isNull()) {
0255         info += source().debugInfo();
0256     }
0257 
0258     const QList<Category> cats = categories();
0259     for (const auto &c : cats) {
0260         info += c.debugInfo();
0261     }
0262 
0263     const QList<Enclosure> encs = enclosures();
0264     for (const auto &e : encs) {
0265         info += e.debugInfo();
0266     }
0267 
0268     info += QLatin1String("### Item end ################\n");
0269     return info;
0270 }
0271 
0272 QList<QDomElement> Item::unhandledElements() const
0273 {
0274     // TODO: do not hardcode this list here
0275     static std::vector<ElementType> handled; // QVector would require a default ctor, and ElementType is too big for QList
0276     if (handled.empty()) {
0277         handled.reserve(11);
0278         handled.push_back(ElementType(QStringLiteral("title")));
0279         handled.push_back(ElementType(QStringLiteral("link")));
0280         handled.push_back(ElementType(QStringLiteral("description")));
0281         handled.push_back(ElementType(QStringLiteral("pubDate")));
0282         handled.push_back(ElementType(QStringLiteral("expirationDate")));
0283         handled.push_back(ElementType(QStringLiteral("rating")));
0284         handled.push_back(ElementType(QStringLiteral("source")));
0285         handled.push_back(ElementType(QStringLiteral("guid")));
0286         handled.push_back(ElementType(QStringLiteral("comments")));
0287         handled.push_back(ElementType(QStringLiteral("author")));
0288         handled.push_back(ElementType(QStringLiteral("date"), dublinCoreNamespace()));
0289     }
0290 
0291     QList<QDomElement> notHandled;
0292 
0293     QDomNodeList children = element().childNodes();
0294     const int numChildren = children.size();
0295     for (int i = 0; i < numChildren; ++i) {
0296         QDomElement el = children.at(i).toElement();
0297         if (!el.isNull() //
0298             && std::find(handled.cbegin(), handled.cend(), ElementType(el.localName(), el.namespaceURI())) == handled.cend()) {
0299             notHandled.append(el);
0300         }
0301     }
0302 
0303     return notHandled;
0304 }
0305 
0306 bool Item::accept(SpecificItemVisitor *visitor)
0307 {
0308     return visitor->visitRSS2Item(this);
0309 }
0310 
0311 } // namespace RSS2
0312 } // namespace Syndication