File indexing completed on 2024-09-08 12:24:36
0001 /* 0002 This file is part of the syndication library 0003 SPDX-FileCopyrightText: 2005 Frank Osterfeld <osterfeld@kde.org> 0004 0005 SPDX-License-Identifier: LGPL-2.0-or-later 0006 */ 0007 0008 #include <rss2/category.h> 0009 #include <rss2/cloud.h> 0010 #include <rss2/document.h> 0011 #include <rss2/image.h> 0012 #include <rss2/item.h> 0013 #include <rss2/textinput.h> 0014 0015 #include <constants.h> 0016 #include <documentvisitor.h> 0017 #include <tools.h> 0018 0019 #include <QDomDocument> 0020 #include <QList> 0021 #include <QSet> 0022 #include <QString> 0023 0024 #include <vector> 0025 0026 namespace Syndication 0027 { 0028 namespace RSS2 0029 { 0030 class SYNDICATION_NO_EXPORT Document::DocumentPrivate 0031 { 0032 public: 0033 DocumentPrivate() 0034 : itemDescriptionIsCDATA(false) 0035 , itemDescriptionContainsMarkup(false) 0036 , itemDescGuessed(false) 0037 , itemTitleIsCDATA(false) 0038 , itemTitleContainsMarkup(false) 0039 , itemTitlesGuessed(false) 0040 { 0041 } 0042 mutable bool itemDescriptionIsCDATA; 0043 mutable bool itemDescriptionContainsMarkup; 0044 mutable bool itemDescGuessed; 0045 mutable bool itemTitleIsCDATA; 0046 mutable bool itemTitleContainsMarkup; 0047 mutable bool itemTitlesGuessed; 0048 }; 0049 0050 Document::Document(const QDomElement &element) 0051 : SpecificDocument() 0052 , ElementWrapper(element) 0053 , d(new DocumentPrivate) 0054 { 0055 } 0056 0057 Document Document::fromXML(const QDomDocument &doc) 0058 { 0059 QDomNode channelNode = doc.namedItem(QStringLiteral("rss")).namedItem(QStringLiteral("channel")); 0060 0061 return Document(channelNode.toElement()); 0062 } 0063 0064 Document::Document() 0065 : SpecificDocument() 0066 , ElementWrapper() 0067 , d(new DocumentPrivate) 0068 { 0069 } 0070 0071 Document::Document(const Document &other) 0072 : SpecificDocument(other) 0073 , ElementWrapper(other) 0074 { 0075 d = other.d; 0076 } 0077 0078 Document::~Document() 0079 { 0080 } 0081 0082 Document &Document::operator=(const Document &other) 0083 { 0084 ElementWrapper::operator=(other); 0085 d = other.d; 0086 return *this; 0087 } 0088 bool Document::isValid() const 0089 { 0090 return !isNull(); 0091 } 0092 0093 QString Document::title() const 0094 { 0095 return extractElementTextNS(QString(), QStringLiteral("title")); 0096 } 0097 0098 QString Document::link() const 0099 { 0100 return extractElementTextNS(QString(), QStringLiteral("link")); 0101 } 0102 0103 QString Document::description() const 0104 { 0105 const QString desc = extractElementTextNS(QString(), QStringLiteral("description")); 0106 return normalize(desc); 0107 } 0108 0109 QString Document::language() const 0110 { 0111 const QString lang = extractElementTextNS(QString(), QStringLiteral("language")); 0112 0113 if (!lang.isNull()) { 0114 return lang; 0115 } else { 0116 return extractElementTextNS(dublinCoreNamespace(), QStringLiteral("language")); 0117 } 0118 } 0119 0120 QString Document::copyright() const 0121 { 0122 const QString rights = extractElementTextNS(QString(), QStringLiteral("copyright")); 0123 if (!rights.isNull()) { 0124 return rights; 0125 } else { 0126 // if <copyright> is not provided, use <dc:rights> 0127 return extractElementTextNS(dublinCoreNamespace(), QStringLiteral("rights")); 0128 } 0129 } 0130 0131 QString Document::managingEditor() const 0132 { 0133 return extractElementTextNS(QString(), QStringLiteral("managingEditor")); 0134 } 0135 0136 QString Document::webMaster() const 0137 { 0138 return extractElementTextNS(QString(), QStringLiteral("webMaster")); 0139 } 0140 0141 time_t Document::pubDate() const 0142 { 0143 QString str = extractElementTextNS(QString(), QStringLiteral("pubDate")); 0144 0145 if (!str.isNull()) { 0146 return parseDate(str, RFCDate); 0147 } else { 0148 // if there is no pubDate, check for dc:date 0149 str = extractElementTextNS(dublinCoreNamespace(), QStringLiteral("date")); 0150 return parseDate(str, ISODate); 0151 } 0152 } 0153 0154 time_t Document::lastBuildDate() const 0155 { 0156 const QString str = extractElementTextNS(QString(), QStringLiteral("lastBuildDate")); 0157 0158 return parseDate(str, RFCDate); 0159 } 0160 0161 QList<Category> Document::categories() const 0162 { 0163 const QList<QDomElement> catNodes = elementsByTagNameNS(QString(), QStringLiteral("category")); 0164 0165 QList<Category> categories; 0166 categories.reserve(catNodes.count()); 0167 0168 std::transform(catNodes.cbegin(), catNodes.cend(), std::back_inserter(categories), [](const QDomElement &element) { 0169 return Category(element); 0170 }); 0171 0172 return categories; 0173 } 0174 0175 QString Document::generator() const 0176 { 0177 return extractElementTextNS(QString(), QStringLiteral("generator")); 0178 } 0179 0180 QString Document::docs() const 0181 { 0182 return extractElementTextNS(QString(), QStringLiteral("docs")); 0183 } 0184 0185 Cloud Document::cloud() const 0186 { 0187 return Cloud(firstElementByTagNameNS(QString(), QStringLiteral("cloud"))); 0188 } 0189 0190 int Document::ttl() const 0191 { 0192 bool ok; 0193 int c; 0194 0195 QString text = extractElementTextNS(QString(), QStringLiteral("ttl")); 0196 c = text.toInt(&ok); 0197 return ok ? c : 0; 0198 } 0199 0200 Image Document::image() const 0201 { 0202 return Image(firstElementByTagNameNS(QString(), QStringLiteral("image"))); 0203 } 0204 0205 TextInput Document::textInput() const 0206 { 0207 TextInput ti = TextInput(firstElementByTagNameNS(QString(), QStringLiteral("textInput"))); 0208 0209 if (!ti.isNull()) { 0210 return ti; 0211 } 0212 0213 // Netscape's version of RSS 0.91 has textinput, not textInput 0214 return TextInput(firstElementByTagNameNS(QString(), QStringLiteral("textinput"))); 0215 } 0216 0217 QSet<int> Document::skipHours() const 0218 { 0219 QSet<int> skipHours; 0220 QDomElement skipHoursNode = firstElementByTagNameNS(QString(), QStringLiteral("skipHours")); 0221 if (!skipHoursNode.isNull()) { 0222 ElementWrapper skipHoursWrapper(skipHoursNode); 0223 bool ok = false; 0224 const QList<QDomElement> hours = skipHoursWrapper.elementsByTagNameNS(QString(), QStringLiteral("hour")); 0225 for (const auto &element : hours) { 0226 const int h = element.text().toInt(&ok); 0227 if (ok) { 0228 skipHours.insert(h); 0229 } 0230 } 0231 } 0232 0233 return skipHours; 0234 } 0235 0236 QSet<Document::DayOfWeek> Document::skipDays() const 0237 { 0238 QSet<DayOfWeek> skipDays; 0239 QDomElement skipDaysNode = firstElementByTagNameNS(QString(), QStringLiteral("skipDays")); 0240 if (!skipDaysNode.isNull()) { 0241 ElementWrapper skipDaysWrapper(skipDaysNode); 0242 struct DayInfo { 0243 QLatin1String name; 0244 DayOfWeek enumValue; 0245 }; 0246 static const std::vector<DayInfo> weekDays = { 0247 {QLatin1String("Monday"), Monday}, 0248 {QLatin1String("Tuesday"), Tuesday}, 0249 {QLatin1String("Wednesday"), Wednesday}, 0250 {QLatin1String("Thursday"), Thursday}, 0251 {QLatin1String("Friday"), Friday}, 0252 {QLatin1String("Saturday"), Saturday}, 0253 {QLatin1String("Sunday"), Sunday}, 0254 }; 0255 0256 const QList<QDomElement> days = skipDaysWrapper.elementsByTagNameNS(QString(), QStringLiteral("day")); 0257 for (const auto &element : days) { 0258 const QString day = element.text(); 0259 auto it = std::find_if(weekDays.cbegin(), weekDays.cend(), [&day](const DayInfo &info) { 0260 return info.name == day; 0261 }); 0262 if (it != weekDays.cend()) { 0263 skipDays.insert(it->enumValue); 0264 } 0265 } 0266 } 0267 0268 return skipDays; 0269 } 0270 0271 QList<Item> Document::items() const 0272 { 0273 const QList<QDomElement> itemNodes = elementsByTagNameNS(QString(), QStringLiteral("item")); 0274 0275 QList<Item> items; 0276 items.reserve(itemNodes.count()); 0277 0278 DocumentPtr doccpy(new Document(*this)); 0279 0280 std::transform(itemNodes.cbegin(), itemNodes.cend(), std::back_inserter(items), [&doccpy](const QDomElement &element) { 0281 return Item(element, doccpy); 0282 }); 0283 0284 return items; 0285 } 0286 QList<QDomElement> Document::unhandledElements() const 0287 { 0288 // TODO: do not hardcode this list here 0289 static std::vector<ElementType> handled; // QVector would require a default ctor, and ElementType is too big for QList 0290 if (handled.empty()) { 0291 handled.reserve(22); 0292 handled.push_back(ElementType(QStringLiteral("title"))); 0293 handled.push_back(ElementType(QStringLiteral("link"))); 0294 handled.push_back(ElementType(QStringLiteral("description"))); 0295 handled.push_back(ElementType(QStringLiteral("language"))); 0296 handled.push_back(ElementType(QStringLiteral("copyright"))); 0297 handled.push_back(ElementType(QStringLiteral("managingEditor"))); 0298 handled.push_back(ElementType(QStringLiteral("webMaster"))); 0299 handled.push_back(ElementType(QStringLiteral("pubDate"))); 0300 handled.push_back(ElementType(QStringLiteral("lastBuildDate"))); 0301 handled.push_back(ElementType(QStringLiteral("skipDays"))); 0302 handled.push_back(ElementType(QStringLiteral("skipHours"))); 0303 handled.push_back(ElementType(QStringLiteral("item"))); 0304 handled.push_back(ElementType(QStringLiteral("textinput"))); 0305 handled.push_back(ElementType(QStringLiteral("textInput"))); 0306 handled.push_back(ElementType(QStringLiteral("image"))); 0307 handled.push_back(ElementType(QStringLiteral("ttl"))); 0308 handled.push_back(ElementType(QStringLiteral("generator"))); 0309 handled.push_back(ElementType(QStringLiteral("docs"))); 0310 handled.push_back(ElementType(QStringLiteral("cloud"))); 0311 handled.push_back(ElementType(QStringLiteral("language"), dublinCoreNamespace())); 0312 handled.push_back(ElementType(QStringLiteral("rights"), dublinCoreNamespace())); 0313 handled.push_back(ElementType(QStringLiteral("date"), dublinCoreNamespace())); 0314 } 0315 0316 QList<QDomElement> notHandled; 0317 0318 QDomNodeList children = element().childNodes(); 0319 const int numChildren = children.size(); 0320 for (int i = 0; i < numChildren; ++i) { 0321 QDomElement el = children.at(i).toElement(); 0322 if (!el.isNull() // 0323 && std::find(handled.cbegin(), handled.cend(), ElementType(el.localName(), el.namespaceURI())) == handled.cend()) { 0324 notHandled.append(el); 0325 } 0326 } 0327 0328 return notHandled; 0329 } 0330 0331 QString Document::debugInfo() const 0332 { 0333 QString info; 0334 info += QLatin1String("### Document: ###################\n"); 0335 if (!title().isNull()) { 0336 info += QLatin1String("title: #") + title() + QLatin1String("#\n"); 0337 } 0338 if (!description().isNull()) { 0339 info += QLatin1String("description: #") + description() + QLatin1String("#\n"); 0340 } 0341 if (!link().isNull()) { 0342 info += QLatin1String("link: #") + link() + QLatin1String("#\n"); 0343 } 0344 if (!language().isNull()) { 0345 info += QLatin1String("language: #") + language() + QLatin1String("#\n"); 0346 } 0347 if (!copyright().isNull()) { 0348 info += QLatin1String("copyright: #") + copyright() + QLatin1String("#\n"); 0349 } 0350 if (!managingEditor().isNull()) { 0351 info += QLatin1String("managingEditor: #") + managingEditor() + QLatin1String("#\n"); 0352 } 0353 if (!webMaster().isNull()) { 0354 info += QLatin1String("webMaster: #") + webMaster() + QLatin1String("#\n"); 0355 } 0356 0357 QString dpubdate = dateTimeToString(pubDate()); 0358 if (!dpubdate.isNull()) { 0359 info += QLatin1String("pubDate: #") + dpubdate + QLatin1String("#\n"); 0360 } 0361 0362 QString dlastbuilddate = dateTimeToString(lastBuildDate()); 0363 if (!dlastbuilddate.isNull()) { 0364 info += QLatin1String("lastBuildDate: #") + dlastbuilddate + QLatin1String("#\n"); 0365 } 0366 0367 if (!textInput().isNull()) { 0368 info += textInput().debugInfo(); 0369 } 0370 if (!cloud().isNull()) { 0371 info += cloud().debugInfo(); 0372 } 0373 if (!image().isNull()) { 0374 info += image().debugInfo(); 0375 } 0376 0377 const QList<Category> cats = categories(); 0378 0379 for (const auto &c : cats) { 0380 info += c.debugInfo(); 0381 } 0382 0383 const QList<Item> litems = items(); 0384 for (const auto &item : litems) { 0385 info += item.debugInfo(); 0386 } 0387 info += QLatin1String("### Document end ################\n"); 0388 return info; 0389 } 0390 0391 void Document::getItemTitleFormatInfo(bool *isCDATA, bool *containsMarkup) const 0392 { 0393 if (!d->itemTitlesGuessed) { 0394 QString titles; 0395 QList<Item> litems = items(); 0396 0397 if (litems.isEmpty()) { 0398 d->itemTitlesGuessed = true; 0399 return; 0400 } 0401 0402 QDomElement titleEl = (*litems.begin()).firstElementByTagNameNS(QString(), QStringLiteral("title")); 0403 d->itemTitleIsCDATA = titleEl.firstChild().isCDATASection(); 0404 0405 const int nmax = std::min<int>(litems.size(), 10); // we check a maximum of 10 items 0406 int i = 0; 0407 0408 for (const auto &item : litems) { 0409 if (i++ >= nmax) { 0410 break; 0411 } 0412 titles += item.originalTitle(); 0413 } 0414 0415 d->itemTitleContainsMarkup = stringContainsMarkup(titles); 0416 d->itemTitlesGuessed = true; 0417 } 0418 0419 if (isCDATA != nullptr) { 0420 *isCDATA = d->itemTitleIsCDATA; 0421 } 0422 if (containsMarkup != nullptr) { 0423 *containsMarkup = d->itemTitleContainsMarkup; 0424 } 0425 } 0426 0427 void Document::getItemDescriptionFormatInfo(bool *isCDATA, bool *containsMarkup) const 0428 { 0429 if (!d->itemDescGuessed) { 0430 QString desc; 0431 QList<Item> litems = items(); 0432 0433 if (litems.isEmpty()) { 0434 d->itemDescGuessed = true; 0435 return; 0436 } 0437 0438 QDomElement descEl = (*litems.begin()).firstElementByTagNameNS(QString(), QStringLiteral("description")); 0439 d->itemDescriptionIsCDATA = descEl.firstChild().isCDATASection(); 0440 0441 const int nmax = std::min<int>(litems.size(), 10); // we check a maximum of 10 items 0442 int i = 0; 0443 0444 for (const auto &item : litems) { 0445 if (i++ >= nmax) { 0446 break; 0447 } 0448 desc += item.originalDescription(); 0449 } 0450 0451 d->itemDescriptionContainsMarkup = stringContainsMarkup(desc); 0452 d->itemDescGuessed = true; 0453 } 0454 0455 if (isCDATA != nullptr) { 0456 *isCDATA = d->itemDescriptionIsCDATA; 0457 } 0458 if (containsMarkup != nullptr) { 0459 *containsMarkup = d->itemDescriptionContainsMarkup; 0460 } 0461 } 0462 0463 bool Document::accept(DocumentVisitor *visitor) 0464 { 0465 return visitor->visitRSS2Document(this); 0466 } 0467 0468 } // namespace RSS2 0469 } // namespace Syndication