File indexing completed on 2024-12-01 12:41:52
0001 /* 0002 This file is part of the syndication library 0003 SPDX-FileCopyrightText: 2005 Frank Osterfeld <osterfeld@kde.org> 0004 0005 SPDX-License-Identifier: LGPL-2.0-or-later 0006 */ 0007 0008 #include <constants.h> 0009 #include <rss2/category.h> 0010 #include <rss2/enclosure.h> 0011 #include <rss2/item.h> 0012 #include <rss2/source.h> 0013 #include <rss2/tools_p.h> 0014 #include <specificitem.h> 0015 #include <specificitemvisitor.h> 0016 #include <tools.h> 0017 0018 #include <QDomElement> 0019 #include <QList> 0020 #include <QString> 0021 0022 #include <vector> 0023 0024 namespace Syndication 0025 { 0026 namespace RSS2 0027 { 0028 class SYNDICATION_NO_EXPORT Item::ItemPrivate 0029 { 0030 public: 0031 QSharedPointer<Document> doc; 0032 }; 0033 0034 Item::Item(QSharedPointer<Document> doc) 0035 : ElementWrapper() 0036 , d(new ItemPrivate) 0037 { 0038 d->doc = doc; 0039 } 0040 0041 Item::Item(const QDomElement &element, QSharedPointer<Document> doc) 0042 : ElementWrapper(element) 0043 , d(new ItemPrivate) 0044 { 0045 d->doc = doc; 0046 } 0047 0048 Item::~Item() 0049 { 0050 } 0051 0052 Item::Item(const Item &other) 0053 : ElementWrapper(other) 0054 , SpecificItem(other) 0055 { 0056 d = other.d; 0057 } 0058 0059 Item &Item::operator=(const Item &other) 0060 { 0061 ElementWrapper::operator=(other); 0062 SpecificItem::operator=(other); 0063 d = other.d; 0064 return *this; 0065 } 0066 0067 QString Item::title() const 0068 { 0069 if (!d->doc) { 0070 return originalTitle(); 0071 } 0072 0073 bool isCDATA = false; 0074 bool containsMarkup = false; 0075 d->doc->getItemTitleFormatInfo(&isCDATA, &containsMarkup); 0076 0077 return normalize(originalTitle(), isCDATA, containsMarkup); 0078 } 0079 0080 QString Item::originalDescription() const 0081 { 0082 return extractElementTextNS(QString(), QStringLiteral("description")); 0083 } 0084 0085 QString Item::originalTitle() const 0086 { 0087 return extractElementTextNS(QString(), QStringLiteral("title")); 0088 } 0089 0090 QString Item::link() const 0091 { 0092 QString url = extractElementTextNS(QString(), QStringLiteral("link")); 0093 if (url.startsWith(QLatin1String("http://")) || url.startsWith(QLatin1String("https://"))) { 0094 return url; 0095 } 0096 if (url.isEmpty()) { 0097 return QString(); 0098 } 0099 if (d->doc->link().isEmpty()) { 0100 return url; 0101 } 0102 // link does not look like a complete url, assume the feed author expects 0103 // the doc link to provide the base of the url. 0104 QString baseUrl = d->doc->link(); 0105 if (url.startsWith(QLatin1Char('/')) || baseUrl.endsWith(QLatin1Char('/'))) { 0106 return baseUrl + url; 0107 } else { 0108 return baseUrl + QLatin1Char('/') + url; 0109 } 0110 } 0111 0112 QString Item::description() const 0113 { 0114 if (!d->doc) { 0115 return originalDescription(); 0116 } 0117 0118 bool isCDATA = false; 0119 bool containsMarkup = false; 0120 d->doc->getItemDescriptionFormatInfo(&isCDATA, &containsMarkup); 0121 0122 return normalize(originalDescription(), isCDATA, containsMarkup); 0123 } 0124 0125 QString Item::content() const 0126 { 0127 // parse encoded stuff from content:encoded, xhtml:body and friends into content 0128 return extractContent(*this); 0129 } 0130 0131 QList<Category> Item::categories() const 0132 { 0133 const QList<QDomElement> cats = elementsByTagNameNS(QString(), QStringLiteral("category")); 0134 0135 QList<Category> categories; 0136 categories.reserve(cats.count()); 0137 0138 std::transform(cats.cbegin(), cats.cend(), std::back_inserter(categories), [](const QDomElement &element) { 0139 return Category(element); 0140 }); 0141 0142 return categories; 0143 } 0144 0145 QString Item::comments() const 0146 { 0147 return extractElementTextNS(QString(), QStringLiteral("comments")); 0148 } 0149 0150 QString Item::author() const 0151 { 0152 QString a = extractElementTextNS(QString(), QStringLiteral("author")); 0153 if (!a.isNull()) { 0154 return a; 0155 } else { 0156 // if author is not available, fall back to dc:creator 0157 return extractElementTextNS(dublinCoreNamespace(), QStringLiteral("creator")); 0158 } 0159 } 0160 0161 QList<Enclosure> Item::enclosures() const 0162 { 0163 const QList<QDomElement> encs = elementsByTagNameNS(QString(), QStringLiteral("enclosure")); 0164 0165 QList<Enclosure> enclosures; 0166 enclosures.reserve(encs.count()); 0167 0168 std::transform(encs.cbegin(), encs.cend(), std::back_inserter(enclosures), [](const QDomElement &element) { 0169 return Enclosure(element); 0170 }); 0171 0172 return enclosures; 0173 } 0174 0175 QString Item::guid() const 0176 { 0177 return extractElementTextNS(QString(), QStringLiteral("guid")); 0178 } 0179 0180 bool Item::guidIsPermaLink() const 0181 { 0182 bool guidIsPermaLink = true; // true is default 0183 0184 QDomElement guidNode = firstElementByTagNameNS(QString(), QStringLiteral("guid")); 0185 if (!guidNode.isNull()) { 0186 if (guidNode.attribute(QStringLiteral("isPermaLink")) == QLatin1String("false")) { 0187 guidIsPermaLink = false; 0188 } 0189 } 0190 0191 return guidIsPermaLink; 0192 } 0193 0194 time_t Item::pubDate() const 0195 { 0196 QString str = extractElementTextNS(QString(), QStringLiteral("pubDate")); 0197 0198 if (!str.isNull()) { 0199 return parseDate(str, RFCDate); 0200 } 0201 0202 // if there is no pubDate, check for dc:date 0203 str = extractElementTextNS(dublinCoreNamespace(), QStringLiteral("date")); 0204 return parseDate(str, ISODate); 0205 } 0206 0207 time_t Item::expirationDate() const 0208 { 0209 QString str = extractElementTextNS(QString(), QStringLiteral("expirationDate")); 0210 return parseDate(str, RFCDate); 0211 } 0212 0213 Source Item::source() const 0214 { 0215 return Source(firstElementByTagNameNS(QString(), QStringLiteral("source"))); 0216 } 0217 0218 QString Item::rating() const 0219 { 0220 return extractElementTextNS(QString(), QStringLiteral("rating")); 0221 } 0222 0223 QString Item::debugInfo() const 0224 { 0225 QString info = QLatin1String("### Item: ###################\n"); 0226 if (!title().isNull()) { 0227 info += QLatin1String("title: #") + title() + QLatin1String("#\n"); 0228 } 0229 if (!link().isNull()) { 0230 info += QLatin1String("link: #") + link() + QLatin1String("#\n"); 0231 } 0232 if (!description().isNull()) { 0233 info += QLatin1String("description: #") + description() + QLatin1String("#\n"); 0234 } 0235 if (!content().isNull()) { 0236 info += QLatin1String("content: #") + content() + QLatin1String("#\n"); 0237 } 0238 if (!author().isNull()) { 0239 info += QLatin1String("author: #") + author() + QLatin1String("#\n"); 0240 } 0241 if (!comments().isNull()) { 0242 info += QLatin1String("comments: #") + comments() + QLatin1String("#\n"); 0243 } 0244 QString dpubdate = dateTimeToString(pubDate()); 0245 if (!dpubdate.isNull()) { 0246 info += QLatin1String("pubDate: #") + dpubdate + QLatin1String("#\n"); 0247 } 0248 if (!guid().isNull()) { 0249 info += QLatin1String("guid: #") + guid() + QLatin1String("#\n"); 0250 } 0251 if (guidIsPermaLink()) { 0252 info += QLatin1String("guid is PL: #true#\n"); 0253 } 0254 if (!source().isNull()) { 0255 info += source().debugInfo(); 0256 } 0257 0258 const QList<Category> cats = categories(); 0259 for (const auto &c : cats) { 0260 info += c.debugInfo(); 0261 } 0262 0263 const QList<Enclosure> encs = enclosures(); 0264 for (const auto &e : encs) { 0265 info += e.debugInfo(); 0266 } 0267 0268 info += QLatin1String("### Item end ################\n"); 0269 return info; 0270 } 0271 0272 QList<QDomElement> Item::unhandledElements() const 0273 { 0274 // TODO: do not hardcode this list here 0275 static std::vector<ElementType> handled; // QVector would require a default ctor, and ElementType is too big for QList 0276 if (handled.empty()) { 0277 handled.reserve(11); 0278 handled.push_back(ElementType(QStringLiteral("title"))); 0279 handled.push_back(ElementType(QStringLiteral("link"))); 0280 handled.push_back(ElementType(QStringLiteral("description"))); 0281 handled.push_back(ElementType(QStringLiteral("pubDate"))); 0282 handled.push_back(ElementType(QStringLiteral("expirationDate"))); 0283 handled.push_back(ElementType(QStringLiteral("rating"))); 0284 handled.push_back(ElementType(QStringLiteral("source"))); 0285 handled.push_back(ElementType(QStringLiteral("guid"))); 0286 handled.push_back(ElementType(QStringLiteral("comments"))); 0287 handled.push_back(ElementType(QStringLiteral("author"))); 0288 handled.push_back(ElementType(QStringLiteral("date"), dublinCoreNamespace())); 0289 } 0290 0291 QList<QDomElement> notHandled; 0292 0293 QDomNodeList children = element().childNodes(); 0294 const int numChildren = children.size(); 0295 for (int i = 0; i < numChildren; ++i) { 0296 QDomElement el = children.at(i).toElement(); 0297 if (!el.isNull() // 0298 && std::find(handled.cbegin(), handled.cend(), ElementType(el.localName(), el.namespaceURI())) == handled.cend()) { 0299 notHandled.append(el); 0300 } 0301 } 0302 0303 return notHandled; 0304 } 0305 0306 bool Item::accept(SpecificItemVisitor *visitor) 0307 { 0308 return visitor->visitRSS2Item(this); 0309 } 0310 0311 } // namespace RSS2 0312 } // namespace Syndication