File indexing completed on 2024-04-28 05:11:00
0001 /* 0002 This file is part of Akregator. 0003 0004 SPDX-FileCopyrightText: 2004 Stanislav Karchebny <Stanislav.Karchebny@kdemail.net> 0005 SPDX-FileCopyrightText: 2005 Frank Osterfeld <osterfeld@kde.org> 0006 0007 SPDX-License-Identifier: GPL-2.0-or-later WITH Qt-Commercial-exception-1.0 0008 */ 0009 0010 #include "article.h" 0011 #include "feed.h" 0012 #include "shared.h" 0013 #include "storage/feedstorage.h" 0014 #include "storage/storage.h" 0015 #include "utils.h" 0016 0017 #include <Syndication/Syndication> 0018 0019 #include <QDateTime> 0020 #include <QList> 0021 #include <QRegularExpression> 0022 #include <qdom.h> 0023 0024 #include <QUrl> 0025 #include <cassert> 0026 0027 using namespace Syndication; 0028 0029 namespace 0030 { 0031 QString buildTitle(const QString &description) 0032 { 0033 QString s = description; 0034 if (description.trimmed().isEmpty()) { 0035 return {}; 0036 } 0037 0038 int i = s.indexOf(QLatin1Char('>'), 500); /*avoid processing too much */ 0039 if (i != -1) { 0040 s = s.left(i + 1); 0041 } 0042 const QRegularExpression rx(QStringLiteral("(<([^\\s>]*)(?:[^>]*)>)[^<]*")); 0043 int offset = 0; 0044 QRegularExpressionMatch rmatch; 0045 // We get the opening tag (e.g. <i>) in one iteration and then the closing 0046 // tag (e.g. </i>), in the next one. Note that <br> doesn't have a closing tag 0047 while (s.indexOf(rx, offset, &rmatch) != -1) { 0048 const QString tagName = rmatch.captured(2); 0049 QString toReplace; 0050 QString replaceWith; 0051 int repStart = 0; 0052 if (tagName.compare(QLatin1StringView("script"), Qt::CaseInsensitive) == 0) { 0053 // E.g.: <script foo="bar">some js here</script> 0054 // strip tag AND tag contents 0055 toReplace = rmatch.captured(0); 0056 repStart = rmatch.capturedStart(0); 0057 } else if (tagName.startsWith(QLatin1StringView("br"), Qt::CaseInsensitive)) { 0058 toReplace = rmatch.captured(1); 0059 repStart = rmatch.capturedStart(1); 0060 replaceWith = QLatin1Char(' '); 0061 } else { 0062 // Any other tag, <i>text</i> ... etc 0063 toReplace = rmatch.captured(1); // strip just tag 0064 repStart = rmatch.capturedStart(1); 0065 } 0066 s.replace(repStart, toReplace.length(), replaceWith); // do the deed 0067 offset = repStart + replaceWith.length(); 0068 } 0069 if (s.length() > 90) { 0070 s = s.left(90) + QLatin1StringView("..."); 0071 } 0072 return s.simplified(); 0073 } 0074 } 0075 0076 namespace Akregator 0077 { 0078 struct Article::Private : public Shared { 0079 Private(); 0080 Private(const QString &guid, Feed *feed, Backend::FeedStorage *archive); 0081 Private(const ItemPtr &article, Feed *feed, Backend::FeedStorage *archive); 0082 0083 /** The status of the article is stored in an int, the bits having the 0084 following meaning: 0085 0086 0000 0001 Deleted 0087 0000 0010 Trash 0088 0000 0100 New 0089 0000 1000 Read 0090 0001 0000 Keep 0091 */ 0092 enum Status { Deleted = 0x01, Trash = 0x02, New = 0x04, Read = 0x08, Keep = 0x10 }; 0093 0094 Feed *feed = nullptr; 0095 QString guid; 0096 Backend::FeedStorage *archive = nullptr; 0097 int status; 0098 uint hash; 0099 QDateTime pubDate; 0100 QString title; // Cache the title, for performance 0101 mutable QSharedPointer<const Enclosure> enclosure; 0102 }; 0103 0104 namespace 0105 { 0106 class EnclosureImpl : public Enclosure 0107 { 0108 public: 0109 EnclosureImpl(const QString &url, const QString &type, uint length) 0110 : m_url(url) 0111 , m_type(type) 0112 , m_length(length) 0113 { 0114 } 0115 0116 [[nodiscard]] QString url() const override 0117 { 0118 return m_url; 0119 } 0120 0121 [[nodiscard]] QString type() const override 0122 { 0123 return m_type; 0124 } 0125 0126 [[nodiscard]] QString title() const override 0127 { 0128 return m_title; 0129 } 0130 0131 [[nodiscard]] uint length() const override 0132 { 0133 return m_length; 0134 } 0135 0136 [[nodiscard]] uint duration() const override 0137 { 0138 return 0; 0139 } 0140 0141 [[nodiscard]] bool isNull() const override 0142 { 0143 return m_url.isNull(); 0144 } 0145 0146 private: 0147 const QString m_url; 0148 const QString m_type; 0149 const QString m_title; // TODO undefined. 0150 const uint m_length; 0151 }; 0152 } 0153 0154 Article::Private::Private() 0155 : feed(nullptr) 0156 , archive(nullptr) 0157 , status(0) 0158 , hash(0) 0159 , pubDate(QDateTime::fromSecsSinceEpoch(1)) 0160 { 0161 } 0162 0163 Article::Private::Private(const QString &guid_, Feed *feed_, Backend::FeedStorage *archive_) 0164 : feed(feed_) 0165 , guid(guid_) 0166 , archive(archive_) 0167 { 0168 archive->article(guid, hash, title, status, pubDate); 0169 } 0170 0171 Article::Private::Private(const ItemPtr &article, Feed *feed_, Backend::FeedStorage *archive_) 0172 : feed(feed_) 0173 , archive(archive_) 0174 , status(New) 0175 , hash(0) 0176 { 0177 Q_ASSERT(archive); 0178 const QList<PersonPtr> authorList = article->authors(); 0179 0180 QString author; 0181 0182 const PersonPtr firstAuthor = !authorList.isEmpty() ? authorList.first() : PersonPtr(); 0183 0184 hash = Utils::calcHash(article->title() + article->description() + article->content() + article->link() + author); 0185 0186 guid = article->id(); 0187 0188 if (!archive->contains(guid)) { 0189 archive->addEntry(guid); 0190 0191 archive->setHash(guid, hash); 0192 title = article->title(); 0193 if (title.isEmpty()) { 0194 title = buildTitle(article->description()); 0195 } 0196 archive->setTitle(guid, title); 0197 archive->setContent(guid, article->content()); 0198 archive->setDescription(guid, article->description()); 0199 archive->setLink(guid, article->link()); 0200 archive->setGuidIsPermaLink(guid, false); 0201 archive->setGuidIsHash(guid, guid.startsWith(QLatin1StringView("hash:"))); 0202 const time_t datePublished = article->datePublished(); 0203 if (datePublished > 0) { 0204 pubDate = QDateTime::fromSecsSinceEpoch(datePublished); 0205 } else { 0206 pubDate = QDateTime::currentDateTime(); 0207 } 0208 archive->setPubDate(guid, pubDate); 0209 if (firstAuthor) { 0210 archive->setAuthorName(guid, firstAuthor->name()); 0211 archive->setAuthorUri(guid, firstAuthor->uri()); 0212 archive->setAuthorEMail(guid, firstAuthor->email()); 0213 } 0214 } else { 0215 // always update comments count, as it's not used for hash calculation 0216 if (hash != archive->hash(guid)) { // article is in archive, was it modified? 0217 // if yes, update 0218 pubDate = archive->pubDate(guid); 0219 archive->setHash(guid, hash); 0220 title = article->title(); 0221 if (title.isEmpty()) { 0222 title = buildTitle(article->description()); 0223 } 0224 archive->setTitle(guid, title); 0225 archive->setDescription(guid, article->description()); 0226 archive->setContent(guid, article->content()); 0227 archive->setLink(guid, article->link()); 0228 if (firstAuthor) { 0229 archive->setAuthorName(guid, firstAuthor->name()); 0230 archive->setAuthorUri(guid, firstAuthor->uri()); 0231 archive->setAuthorEMail(guid, firstAuthor->email()); 0232 } 0233 } 0234 } 0235 0236 const QList<EnclosurePtr> encs = article->enclosures(); 0237 if (!encs.isEmpty()) { 0238 archive->setEnclosure(guid, encs[0]->url(), encs[0]->type(), encs[0]->length()); 0239 } 0240 #if 0 // We need additionalProperties for Bug 366487 0241 qDebug() << "article " << article->additionalProperties().count(); 0242 QMapIterator<QString, QDomElement> i(article->additionalProperties()); 0243 while (i.hasNext()) { 0244 i.next(); 0245 QString str; 0246 QTextStream s(&str, QIODevice::WriteOnly); 0247 i.value().save(s, 2); 0248 0249 qDebug() << i.key() << ": " << str; 0250 } 0251 #endif 0252 } 0253 0254 Article::Article() 0255 : d(new Private) 0256 { 0257 } 0258 0259 Article::Article(const QString &guid, Feed *feed, Backend::FeedStorage *archive) 0260 : d() 0261 { 0262 if (!archive) { 0263 archive = feed->storage()->archiveFor(feed->xmlUrl()); 0264 } 0265 d = new Private(guid, feed, archive); 0266 } 0267 0268 Article::Article(const Syndication::ItemPtr &article, Feed *feed) 0269 : d(new Private(article, feed, feed->storage()->archiveFor(feed->xmlUrl()))) 0270 { 0271 } 0272 0273 Article::Article(const Syndication::ItemPtr &article, Backend::FeedStorage *archive) 0274 : d(new Private(article, nullptr, archive)) 0275 { 0276 } 0277 0278 bool Article::isNull() const 0279 { 0280 return d->archive == nullptr; // TODO: use proper null state 0281 } 0282 0283 void Article::offsetPubDate(int secs) 0284 { 0285 d->pubDate = d->pubDate.addSecs(secs); 0286 d->archive->setPubDate(d->guid, d->pubDate); 0287 } 0288 0289 void Article::setDeleted() 0290 { 0291 if (isDeleted()) { 0292 return; 0293 } 0294 0295 setStatus(Read); 0296 d->status = Private::Deleted | Private::Read; 0297 d->archive->setStatus(d->guid, d->status); 0298 d->archive->setDeleted(d->guid); 0299 0300 if (d->feed) { 0301 d->feed->setArticleDeleted(*this); 0302 } 0303 } 0304 0305 bool Article::isDeleted() const 0306 { 0307 return (d->status & Private::Deleted) != 0; 0308 } 0309 0310 Article::Article(const Article &other) 0311 : d(other.d) 0312 { 0313 d->ref(); 0314 } 0315 0316 Article::~Article() 0317 { 0318 if (d->deref()) { 0319 delete d; 0320 d = nullptr; 0321 } 0322 } 0323 0324 Article &Article::operator=(const Article &other) 0325 { 0326 Article copy(other); 0327 swap(copy); 0328 return *this; 0329 } 0330 0331 bool Article::operator<(const Article &other) const 0332 { 0333 return pubDate() > other.pubDate() || (pubDate() == other.pubDate() && guid() < other.guid()); 0334 } 0335 0336 bool Article::operator<=(const Article &other) const 0337 { 0338 return pubDate() > other.pubDate() || *this == other; 0339 } 0340 0341 bool Article::operator>(const Article &other) const 0342 { 0343 return pubDate() < other.pubDate() || (pubDate() == other.pubDate() && guid() > other.guid()); 0344 } 0345 0346 bool Article::operator>=(const Article &other) const 0347 { 0348 return pubDate() > other.pubDate() || *this == other; 0349 } 0350 0351 bool Article::operator==(const Article &other) const 0352 { 0353 return d->guid == other.guid(); 0354 } 0355 0356 bool Article::operator!=(const Article &other) const 0357 { 0358 return d->guid != other.guid(); 0359 } 0360 0361 int Article::status() const 0362 { 0363 if ((d->status & Private::Read) != 0) { 0364 return Read; 0365 } 0366 0367 if ((d->status & Private::New) != 0) { 0368 return New; 0369 } 0370 0371 return Unread; 0372 } 0373 0374 void Article::setStatus(int stat) 0375 { 0376 int oldStatus = status(); 0377 0378 if (oldStatus != stat) { 0379 switch (stat) { 0380 case Read: 0381 d->status = (d->status | Private::Read) & ~Private::New; 0382 break; 0383 case Unread: 0384 d->status = (d->status & ~Private::Read) & ~Private::New; 0385 break; 0386 case New: 0387 d->status = (d->status | Private::New) & ~Private::Read; 0388 break; 0389 } 0390 if (d->archive) { 0391 d->archive->setStatus(d->guid, d->status); 0392 } 0393 if (d->feed) { 0394 d->feed->setArticleChanged(*this, oldStatus, stat != Read); 0395 } 0396 } 0397 } 0398 0399 QString Article::title() const 0400 { 0401 return d->title; 0402 } 0403 0404 QString Article::authorName() const 0405 { 0406 QString str; 0407 if (d->archive) { 0408 str = d->archive->authorName(d->guid); 0409 } 0410 return str; 0411 } 0412 0413 QString Article::authorEMail() const 0414 { 0415 QString str; 0416 if (d->archive) { 0417 str = d->archive->authorEMail(d->guid); 0418 } 0419 return str; 0420 } 0421 0422 QString Article::authorUri() const 0423 { 0424 QString str; 0425 if (d->archive) { 0426 str = d->archive->authorUri(d->guid); 0427 } 0428 return str; 0429 } 0430 0431 QString Article::authorShort() const 0432 { 0433 const QString name = authorName(); 0434 if (!name.isEmpty()) { 0435 return name; 0436 } 0437 const QString email = authorEMail(); 0438 if (!email.isEmpty()) { 0439 return email; 0440 } 0441 const QString uri = authorUri(); 0442 if (!uri.isEmpty()) { 0443 return uri; 0444 } 0445 return {}; 0446 } 0447 0448 QString Article::authorAsHtml() const 0449 { 0450 const QString name = authorName(); 0451 const QString email = authorEMail(); 0452 0453 if (!email.isEmpty()) { 0454 if (!name.isEmpty()) { 0455 return QStringLiteral("<a href=\"mailto:%1\">%2</a>").arg(email, name); 0456 } else { 0457 return QStringLiteral("<a href=\"mailto:%1\">%1</a>").arg(email); 0458 } 0459 } 0460 0461 const QString uri = authorUri(); 0462 if (!name.isEmpty()) { 0463 if (!uri.isEmpty()) { 0464 return QStringLiteral("<a href=\"%1\">%2</a>").arg(uri, name); 0465 } else { 0466 return name; 0467 } 0468 } 0469 0470 if (!uri.isEmpty()) { 0471 return QStringLiteral("<a href=\"%1\">%1</a>").arg(uri); 0472 } 0473 return {}; 0474 } 0475 0476 QUrl Article::link() const 0477 { 0478 return QUrl(d->archive->link(d->guid)); 0479 } 0480 0481 QString Article::description() const 0482 { 0483 return d->archive->description(d->guid); 0484 } 0485 0486 QString Article::content(ContentOption opt) const 0487 { 0488 const QString cnt = d->archive->content(d->guid); 0489 return opt == ContentAndOnlyContent ? cnt : (!cnt.isEmpty() ? cnt : description()); 0490 } 0491 0492 QString Article::guid() const 0493 { 0494 return d->guid; 0495 } 0496 0497 bool Article::guidIsPermaLink() const 0498 { 0499 return d->archive->guidIsPermaLink(d->guid); 0500 } 0501 0502 bool Article::guidIsHash() const 0503 { 0504 return d->archive->guidIsHash(d->guid); 0505 } 0506 0507 uint Article::hash() const 0508 { 0509 return d->hash; 0510 } 0511 0512 bool Article::keep() const 0513 { 0514 return (d->status & Private::Keep) != 0; 0515 } 0516 0517 void Article::setKeep(bool keep) 0518 { 0519 d->status = keep ? (d->status | Private::Keep) : (d->status & ~Private::Keep); 0520 d->archive->setStatus(d->guid, d->status); 0521 if (d->feed) { 0522 d->feed->setArticleChanged(*this); 0523 } 0524 } 0525 0526 Feed *Article::feed() const 0527 { 0528 return d->feed; 0529 } 0530 0531 QDateTime Article::pubDate() const 0532 { 0533 return d->pubDate; 0534 } 0535 0536 QSharedPointer<const Enclosure> Article::enclosure() const 0537 { 0538 if (!d->enclosure) { 0539 QString url; 0540 QString type; 0541 int length; 0542 bool hasEnc; 0543 d->archive->enclosure(d->guid, hasEnc, url, type, length); 0544 if (hasEnc) { 0545 d->enclosure.reset(new EnclosureImpl(url, type, static_cast<uint>(length))); 0546 } else { 0547 d->enclosure.reset(new EnclosureImpl(QString(), QString(), 0)); 0548 } 0549 } 0550 return d->enclosure; 0551 } 0552 } // namespace Akregator