File indexing completed on 2024-04-28 05:11:00

0001 /*
0002     This file is part of Akregator.
0003 
0004     SPDX-FileCopyrightText: 2004 Stanislav Karchebny <Stanislav.Karchebny@kdemail.net>
0005     SPDX-FileCopyrightText: 2005 Frank Osterfeld <osterfeld@kde.org>
0006 
0007     SPDX-License-Identifier: GPL-2.0-or-later WITH Qt-Commercial-exception-1.0
0008 */
0009 
0010 #include "article.h"
0011 #include "feed.h"
0012 #include "shared.h"
0013 #include "storage/feedstorage.h"
0014 #include "storage/storage.h"
0015 #include "utils.h"
0016 
0017 #include <Syndication/Syndication>
0018 
0019 #include <QDateTime>
0020 #include <QList>
0021 #include <QRegularExpression>
0022 #include <qdom.h>
0023 
0024 #include <QUrl>
0025 #include <cassert>
0026 
0027 using namespace Syndication;
0028 
0029 namespace
0030 {
0031 QString buildTitle(const QString &description)
0032 {
0033     QString s = description;
0034     if (description.trimmed().isEmpty()) {
0035         return {};
0036     }
0037 
0038     int i = s.indexOf(QLatin1Char('>'), 500); /*avoid processing too much */
0039     if (i != -1) {
0040         s = s.left(i + 1);
0041     }
0042     const QRegularExpression rx(QStringLiteral("(<([^\\s>]*)(?:[^>]*)>)[^<]*"));
0043     int offset = 0;
0044     QRegularExpressionMatch rmatch;
0045     // We get the opening tag (e.g. <i>) in one iteration and then the closing
0046     // tag (e.g. </i>), in the next one. Note that <br> doesn't have a closing tag
0047     while (s.indexOf(rx, offset, &rmatch) != -1) {
0048         const QString tagName = rmatch.captured(2);
0049         QString toReplace;
0050         QString replaceWith;
0051         int repStart = 0;
0052         if (tagName.compare(QLatin1StringView("script"), Qt::CaseInsensitive) == 0) {
0053             // E.g.: <script foo="bar">some js here</script>
0054             // strip tag AND tag contents
0055             toReplace = rmatch.captured(0);
0056             repStart = rmatch.capturedStart(0);
0057         } else if (tagName.startsWith(QLatin1StringView("br"), Qt::CaseInsensitive)) {
0058             toReplace = rmatch.captured(1);
0059             repStart = rmatch.capturedStart(1);
0060             replaceWith = QLatin1Char(' ');
0061         } else {
0062             // Any other tag, <i>text</i> ... etc
0063             toReplace = rmatch.captured(1); // strip just tag
0064             repStart = rmatch.capturedStart(1);
0065         }
0066         s.replace(repStart, toReplace.length(), replaceWith); // do the deed
0067         offset = repStart + replaceWith.length();
0068     }
0069     if (s.length() > 90) {
0070         s = s.left(90) + QLatin1StringView("...");
0071     }
0072     return s.simplified();
0073 }
0074 }
0075 
0076 namespace Akregator
0077 {
0078 struct Article::Private : public Shared {
0079     Private();
0080     Private(const QString &guid, Feed *feed, Backend::FeedStorage *archive);
0081     Private(const ItemPtr &article, Feed *feed, Backend::FeedStorage *archive);
0082 
0083     /** The status of the article is stored in an int, the bits having the
0084         following meaning:
0085 
0086         0000 0001 Deleted
0087         0000 0010 Trash
0088         0000 0100 New
0089         0000 1000 Read
0090         0001 0000 Keep
0091      */
0092     enum Status { Deleted = 0x01, Trash = 0x02, New = 0x04, Read = 0x08, Keep = 0x10 };
0093 
0094     Feed *feed = nullptr;
0095     QString guid;
0096     Backend::FeedStorage *archive = nullptr;
0097     int status;
0098     uint hash;
0099     QDateTime pubDate;
0100     QString title; // Cache the title, for performance
0101     mutable QSharedPointer<const Enclosure> enclosure;
0102 };
0103 
0104 namespace
0105 {
0106 class EnclosureImpl : public Enclosure
0107 {
0108 public:
0109     EnclosureImpl(const QString &url, const QString &type, uint length)
0110         : m_url(url)
0111         , m_type(type)
0112         , m_length(length)
0113     {
0114     }
0115 
0116     [[nodiscard]] QString url() const override
0117     {
0118         return m_url;
0119     }
0120 
0121     [[nodiscard]] QString type() const override
0122     {
0123         return m_type;
0124     }
0125 
0126     [[nodiscard]] QString title() const override
0127     {
0128         return m_title;
0129     }
0130 
0131     [[nodiscard]] uint length() const override
0132     {
0133         return m_length;
0134     }
0135 
0136     [[nodiscard]] uint duration() const override
0137     {
0138         return 0;
0139     }
0140 
0141     [[nodiscard]] bool isNull() const override
0142     {
0143         return m_url.isNull();
0144     }
0145 
0146 private:
0147     const QString m_url;
0148     const QString m_type;
0149     const QString m_title; // TODO undefined.
0150     const uint m_length;
0151 };
0152 }
0153 
0154 Article::Private::Private()
0155     : feed(nullptr)
0156     , archive(nullptr)
0157     , status(0)
0158     , hash(0)
0159     , pubDate(QDateTime::fromSecsSinceEpoch(1))
0160 {
0161 }
0162 
0163 Article::Private::Private(const QString &guid_, Feed *feed_, Backend::FeedStorage *archive_)
0164     : feed(feed_)
0165     , guid(guid_)
0166     , archive(archive_)
0167 {
0168     archive->article(guid, hash, title, status, pubDate);
0169 }
0170 
0171 Article::Private::Private(const ItemPtr &article, Feed *feed_, Backend::FeedStorage *archive_)
0172     : feed(feed_)
0173     , archive(archive_)
0174     , status(New)
0175     , hash(0)
0176 {
0177     Q_ASSERT(archive);
0178     const QList<PersonPtr> authorList = article->authors();
0179 
0180     QString author;
0181 
0182     const PersonPtr firstAuthor = !authorList.isEmpty() ? authorList.first() : PersonPtr();
0183 
0184     hash = Utils::calcHash(article->title() + article->description() + article->content() + article->link() + author);
0185 
0186     guid = article->id();
0187 
0188     if (!archive->contains(guid)) {
0189         archive->addEntry(guid);
0190 
0191         archive->setHash(guid, hash);
0192         title = article->title();
0193         if (title.isEmpty()) {
0194             title = buildTitle(article->description());
0195         }
0196         archive->setTitle(guid, title);
0197         archive->setContent(guid, article->content());
0198         archive->setDescription(guid, article->description());
0199         archive->setLink(guid, article->link());
0200         archive->setGuidIsPermaLink(guid, false);
0201         archive->setGuidIsHash(guid, guid.startsWith(QLatin1StringView("hash:")));
0202         const time_t datePublished = article->datePublished();
0203         if (datePublished > 0) {
0204             pubDate = QDateTime::fromSecsSinceEpoch(datePublished);
0205         } else {
0206             pubDate = QDateTime::currentDateTime();
0207         }
0208         archive->setPubDate(guid, pubDate);
0209         if (firstAuthor) {
0210             archive->setAuthorName(guid, firstAuthor->name());
0211             archive->setAuthorUri(guid, firstAuthor->uri());
0212             archive->setAuthorEMail(guid, firstAuthor->email());
0213         }
0214     } else {
0215         // always update comments count, as it's not used for hash calculation
0216         if (hash != archive->hash(guid)) { // article is in archive, was it modified?
0217             // if yes, update
0218             pubDate = archive->pubDate(guid);
0219             archive->setHash(guid, hash);
0220             title = article->title();
0221             if (title.isEmpty()) {
0222                 title = buildTitle(article->description());
0223             }
0224             archive->setTitle(guid, title);
0225             archive->setDescription(guid, article->description());
0226             archive->setContent(guid, article->content());
0227             archive->setLink(guid, article->link());
0228             if (firstAuthor) {
0229                 archive->setAuthorName(guid, firstAuthor->name());
0230                 archive->setAuthorUri(guid, firstAuthor->uri());
0231                 archive->setAuthorEMail(guid, firstAuthor->email());
0232             }
0233         }
0234     }
0235 
0236     const QList<EnclosurePtr> encs = article->enclosures();
0237     if (!encs.isEmpty()) {
0238         archive->setEnclosure(guid, encs[0]->url(), encs[0]->type(), encs[0]->length());
0239     }
0240 #if 0 // We need additionalProperties for Bug 366487
0241     qDebug() << "article " << article->additionalProperties().count();
0242     QMapIterator<QString, QDomElement> i(article->additionalProperties());
0243     while (i.hasNext()) {
0244         i.next();
0245         QString str;
0246         QTextStream s(&str, QIODevice::WriteOnly);
0247         i.value().save(s, 2);
0248 
0249         qDebug() << i.key() << ": " << str;
0250     }
0251 #endif
0252 }
0253 
0254 Article::Article()
0255     : d(new Private)
0256 {
0257 }
0258 
0259 Article::Article(const QString &guid, Feed *feed, Backend::FeedStorage *archive)
0260     : d()
0261 {
0262     if (!archive) {
0263         archive = feed->storage()->archiveFor(feed->xmlUrl());
0264     }
0265     d = new Private(guid, feed, archive);
0266 }
0267 
0268 Article::Article(const Syndication::ItemPtr &article, Feed *feed)
0269     : d(new Private(article, feed, feed->storage()->archiveFor(feed->xmlUrl())))
0270 {
0271 }
0272 
0273 Article::Article(const Syndication::ItemPtr &article, Backend::FeedStorage *archive)
0274     : d(new Private(article, nullptr, archive))
0275 {
0276 }
0277 
0278 bool Article::isNull() const
0279 {
0280     return d->archive == nullptr; // TODO: use proper null state
0281 }
0282 
0283 void Article::offsetPubDate(int secs)
0284 {
0285     d->pubDate = d->pubDate.addSecs(secs);
0286     d->archive->setPubDate(d->guid, d->pubDate);
0287 }
0288 
0289 void Article::setDeleted()
0290 {
0291     if (isDeleted()) {
0292         return;
0293     }
0294 
0295     setStatus(Read);
0296     d->status = Private::Deleted | Private::Read;
0297     d->archive->setStatus(d->guid, d->status);
0298     d->archive->setDeleted(d->guid);
0299 
0300     if (d->feed) {
0301         d->feed->setArticleDeleted(*this);
0302     }
0303 }
0304 
0305 bool Article::isDeleted() const
0306 {
0307     return (d->status & Private::Deleted) != 0;
0308 }
0309 
0310 Article::Article(const Article &other)
0311     : d(other.d)
0312 {
0313     d->ref();
0314 }
0315 
0316 Article::~Article()
0317 {
0318     if (d->deref()) {
0319         delete d;
0320         d = nullptr;
0321     }
0322 }
0323 
0324 Article &Article::operator=(const Article &other)
0325 {
0326     Article copy(other);
0327     swap(copy);
0328     return *this;
0329 }
0330 
0331 bool Article::operator<(const Article &other) const
0332 {
0333     return pubDate() > other.pubDate() || (pubDate() == other.pubDate() && guid() < other.guid());
0334 }
0335 
0336 bool Article::operator<=(const Article &other) const
0337 {
0338     return pubDate() > other.pubDate() || *this == other;
0339 }
0340 
0341 bool Article::operator>(const Article &other) const
0342 {
0343     return pubDate() < other.pubDate() || (pubDate() == other.pubDate() && guid() > other.guid());
0344 }
0345 
0346 bool Article::operator>=(const Article &other) const
0347 {
0348     return pubDate() > other.pubDate() || *this == other;
0349 }
0350 
0351 bool Article::operator==(const Article &other) const
0352 {
0353     return d->guid == other.guid();
0354 }
0355 
0356 bool Article::operator!=(const Article &other) const
0357 {
0358     return d->guid != other.guid();
0359 }
0360 
0361 int Article::status() const
0362 {
0363     if ((d->status & Private::Read) != 0) {
0364         return Read;
0365     }
0366 
0367     if ((d->status & Private::New) != 0) {
0368         return New;
0369     }
0370 
0371     return Unread;
0372 }
0373 
0374 void Article::setStatus(int stat)
0375 {
0376     int oldStatus = status();
0377 
0378     if (oldStatus != stat) {
0379         switch (stat) {
0380         case Read:
0381             d->status = (d->status | Private::Read) & ~Private::New;
0382             break;
0383         case Unread:
0384             d->status = (d->status & ~Private::Read) & ~Private::New;
0385             break;
0386         case New:
0387             d->status = (d->status | Private::New) & ~Private::Read;
0388             break;
0389         }
0390         if (d->archive) {
0391             d->archive->setStatus(d->guid, d->status);
0392         }
0393         if (d->feed) {
0394             d->feed->setArticleChanged(*this, oldStatus, stat != Read);
0395         }
0396     }
0397 }
0398 
0399 QString Article::title() const
0400 {
0401     return d->title;
0402 }
0403 
0404 QString Article::authorName() const
0405 {
0406     QString str;
0407     if (d->archive) {
0408         str = d->archive->authorName(d->guid);
0409     }
0410     return str;
0411 }
0412 
0413 QString Article::authorEMail() const
0414 {
0415     QString str;
0416     if (d->archive) {
0417         str = d->archive->authorEMail(d->guid);
0418     }
0419     return str;
0420 }
0421 
0422 QString Article::authorUri() const
0423 {
0424     QString str;
0425     if (d->archive) {
0426         str = d->archive->authorUri(d->guid);
0427     }
0428     return str;
0429 }
0430 
0431 QString Article::authorShort() const
0432 {
0433     const QString name = authorName();
0434     if (!name.isEmpty()) {
0435         return name;
0436     }
0437     const QString email = authorEMail();
0438     if (!email.isEmpty()) {
0439         return email;
0440     }
0441     const QString uri = authorUri();
0442     if (!uri.isEmpty()) {
0443         return uri;
0444     }
0445     return {};
0446 }
0447 
0448 QString Article::authorAsHtml() const
0449 {
0450     const QString name = authorName();
0451     const QString email = authorEMail();
0452 
0453     if (!email.isEmpty()) {
0454         if (!name.isEmpty()) {
0455             return QStringLiteral("<a href=\"mailto:%1\">%2</a>").arg(email, name);
0456         } else {
0457             return QStringLiteral("<a href=\"mailto:%1\">%1</a>").arg(email);
0458         }
0459     }
0460 
0461     const QString uri = authorUri();
0462     if (!name.isEmpty()) {
0463         if (!uri.isEmpty()) {
0464             return QStringLiteral("<a href=\"%1\">%2</a>").arg(uri, name);
0465         } else {
0466             return name;
0467         }
0468     }
0469 
0470     if (!uri.isEmpty()) {
0471         return QStringLiteral("<a href=\"%1\">%1</a>").arg(uri);
0472     }
0473     return {};
0474 }
0475 
0476 QUrl Article::link() const
0477 {
0478     return QUrl(d->archive->link(d->guid));
0479 }
0480 
0481 QString Article::description() const
0482 {
0483     return d->archive->description(d->guid);
0484 }
0485 
0486 QString Article::content(ContentOption opt) const
0487 {
0488     const QString cnt = d->archive->content(d->guid);
0489     return opt == ContentAndOnlyContent ? cnt : (!cnt.isEmpty() ? cnt : description());
0490 }
0491 
0492 QString Article::guid() const
0493 {
0494     return d->guid;
0495 }
0496 
0497 bool Article::guidIsPermaLink() const
0498 {
0499     return d->archive->guidIsPermaLink(d->guid);
0500 }
0501 
0502 bool Article::guidIsHash() const
0503 {
0504     return d->archive->guidIsHash(d->guid);
0505 }
0506 
0507 uint Article::hash() const
0508 {
0509     return d->hash;
0510 }
0511 
0512 bool Article::keep() const
0513 {
0514     return (d->status & Private::Keep) != 0;
0515 }
0516 
0517 void Article::setKeep(bool keep)
0518 {
0519     d->status = keep ? (d->status | Private::Keep) : (d->status & ~Private::Keep);
0520     d->archive->setStatus(d->guid, d->status);
0521     if (d->feed) {
0522         d->feed->setArticleChanged(*this);
0523     }
0524 }
0525 
0526 Feed *Article::feed() const
0527 {
0528     return d->feed;
0529 }
0530 
0531 QDateTime Article::pubDate() const
0532 {
0533     return d->pubDate;
0534 }
0535 
0536 QSharedPointer<const Enclosure> Article::enclosure() const
0537 {
0538     if (!d->enclosure) {
0539         QString url;
0540         QString type;
0541         int length;
0542         bool hasEnc;
0543         d->archive->enclosure(d->guid, hasEnc, url, type, length);
0544         if (hasEnc) {
0545             d->enclosure.reset(new EnclosureImpl(url, type, static_cast<uint>(length)));
0546         } else {
0547             d->enclosure.reset(new EnclosureImpl(QString(), QString(), 0));
0548         }
0549     }
0550     return d->enclosure;
0551 }
0552 } // namespace Akregator