core/podcasts/PodcastReader.cpp

0001 /****************************************************************************************
0002  * Copyright (c) 2007 Bart Cerneels <bart.cerneels@kde.org>                             *
0003  *               2009 Mathias Panzenböck <grosser.meister.morti@gmx.net>                *
0004  *               2013 Ralf Engels <ralf-engels@gmx.de>                                  *
0005  *                                                                                      *
0006  * This program is free software; you can redistribute it and/or modify it under        *
0007  * the terms of the GNU General Public License as published by the Free Software        *
0008  * Foundation; either version 2 of the License, or (at your option) any later           *
0009  * version.                                                                             *
0010  *                                                                                      *
0011  * This program is distributed in the hope that it will be useful, but WITHOUT ANY      *
0012  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A      *
0013  * PARTICULAR PURPOSE. See the GNU General Public License for more details.             *
0014  *                                                                                      *
0015  * You should have received a copy of the GNU General Public License along with         *
0016  * this program.  If not, see <http://www.gnu.org/licenses/>.                           *
0017  ****************************************************************************************/
0018
0019 #include "core/podcasts/PodcastReader.h"
0020
0021 #include "core/support/Amarok.h"
0022 #include "core/support/Components.h"
0023 #include "core/support/Debug.h"
0024 #include "core/meta/support/MetaUtility.h"
0025
0026 #include <QUrl>
0027
0028 #include <QDate>
0029 #include <QSet>
0030
0031 #include <algorithm>
0032
0033 using namespace Podcasts;
0034
0035 #define ITUNES_NS  "http://www.itunes.com/dtds/podcast-1.0.dtd"
0036 #define RDF_NS     "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
0037 #define RSS10_NS   "http://purl.org/rss/1.0/"
0038 #define RSS20_NS   ""
0039 #define ATOM_NS    "http://www.w3.org/2005/Atom"
0040 #define ENC_NS     "http://purl.oclc.org/net/rss_2.0/enc#"
0041 #define CONTENT_NS "http://purl.org/rss/1.0/modules/content"
0042 #define DC_NS      "http://purl.org/dc/elements/1.1/"
0043
0044 // regular expressions for linkification:
0045 #define RE_USER   "[-+_%\\.\\w]+"
0046 #define RE_PASSWD RE_USER
0047 #define RE_DOMAIN "[-a-zA-Z0-9]+(?:\\.[-a-zA-Z0-9]+)*"
0048 #define RE_PROT   "[a-zA-Z]+://"
0049 #define RE_URL    RE_PROT "(?:" RE_USER "(?::" RE_PASSWD ")?@)?" RE_DOMAIN \
0050     "(?::\\d+)?(?:/[-\\w\\?&=%+.,;:_#~/!@]*)?"
0051 #define RE_MAIL   RE_USER "@" RE_DOMAIN
0052
0053 const PodcastReader::StaticData PodcastReader::sd;
0054
0055 PodcastReader::PodcastReader( PodcastProvider *podcastProvider, QObject *parent )
0056         : QObject( parent )
0057         , m_xmlReader()
0058         , m_podcastProvider( podcastProvider )
0059         , m_transferJob( )
0060         , m_current( nullptr )
0061         , m_actionStack()
0062         , m_contentType( TextContent )
0063         , m_buffer()
0064 {}
0065
0066 void
0067 PodcastReader::Action::begin( PodcastReader *podcastReader ) const
0068 {
0069     if( m_begin )
0070         (( *podcastReader ).*m_begin )();
0071 }
0072
0073 void
0074 PodcastReader::Action::end( PodcastReader *podcastReader ) const
0075 {
0076     if( m_end )
0077         (( *podcastReader ).*m_end )();
0078 }
0079
0080 void
0081 PodcastReader::Action::characters( PodcastReader *podcastReader ) const
0082 {
0083     if( m_characters )
0084         (( *podcastReader ).*m_characters )();
0085 }
0086
0087 // initialization of the feed parser automata:
0088 PodcastReader::StaticData::StaticData()
0089         : removeScripts( QStringLiteral("<script[^<]*</script>|<script[^>]*>"), Qt::CaseInsensitive )
0090         , mightBeHtml( "<\\?xml[^>]*\\?>|<br[^>]*>|<p[^>]*>|&lt;|&gt;|&amp;|&quot;|"
0091                        "<([-:\\w\\d]+)[^>]*(/>|>.*</\\1>)|<hr[>]*>|&#\\d+;|&#x[a-fA-F\\d]+;", Qt::CaseInsensitive )
0092         , linkify( "\\b(" RE_URL ")|\\b(" RE_MAIL ")|(\n)" )
0093
0094         , startAction( rootMap )
0095
0096         , docAction(
0097             docMap,
0098             nullptr,
0099             &PodcastReader::endDocument )
0100         , xmlAction(
0101             xmlMap,
0102             &PodcastReader::beginXml,
0103             &PodcastReader::endXml,
0104             &PodcastReader::readEscapedCharacters )
0105         , skipAction( skipMap )
0106         , noContentAction(
0107             noContentMap,
0108             &PodcastReader::beginNoElement,
0109             nullptr,
0110             &PodcastReader::readNoCharacters )
0111
0112         , rdfAction(
0113             rdfMap,
0114             &PodcastReader::beginRdf )
0115         , rssAction(
0116             rssMap,
0117             &PodcastReader::beginRss )
0118         , feedAction(
0119             feedMap,
0120             &PodcastReader::beginFeed )
0121         , htmlAction(
0122             skipMap,
0123             &PodcastReader::beginHtml )
0124         , unknownFeedTypeAction(
0125             skipMap,
0126             &PodcastReader::beginUnknownFeedType )
0127
0128         // RSS 1.0+2.0
0129         , rss10ChannelAction(
0130             rss10ChannelMap,
0131             &PodcastReader::beginChannel )
0132         , rss20ChannelAction(
0133             rss20ChannelMap,
0134             &PodcastReader::beginChannel )
0135
0136         , titleAction(
0137             textMap,
0138             &PodcastReader::beginText,
0139             &PodcastReader::endTitle,
0140             &PodcastReader::readCharacters )
0141         , subtitleAction(
0142             textMap,
0143             &PodcastReader::beginText,
0144             &PodcastReader::endSubtitle,
0145             &PodcastReader::readCharacters )
0146         , descriptionAction(
0147             textMap,
0148             &PodcastReader::beginText,
0149             &PodcastReader::endDescription,
0150             &PodcastReader::readCharacters )
0151         , encodedAction(
0152             textMap,
0153             &PodcastReader::beginText,
0154             &PodcastReader::endEncoded,
0155             &PodcastReader::readCharacters )
0156         , bodyAction(
0157             xmlMap,
0158             &PodcastReader::beginText,
0159             &PodcastReader::endBody,
0160             &PodcastReader::readEscapedCharacters )
0161         , linkAction(
0162             textMap,
0163             &PodcastReader::beginText,
0164             &PodcastReader::endLink,
0165             &PodcastReader::readCharacters )
0166         , imageAction( imageMap,
0167                        &PodcastReader::beginImage )
0168         , itemAction(
0169             itemMap,
0170             &PodcastReader::beginItem,
0171             &PodcastReader::endItem )
0172         , urlAction(
0173             textMap,
0174             &PodcastReader::beginText,
0175             &PodcastReader::endImageUrl,
0176             &PodcastReader::readCharacters )
0177         , authorAction(
0178             textMap,
0179             &PodcastReader::beginText,
0180             &PodcastReader::endAuthor,
0181             &PodcastReader::readCharacters )
0182         , creatorAction(
0183             textMap,
0184             &PodcastReader::beginText,
0185             &PodcastReader::endCreator,
0186             &PodcastReader::readCharacters )
0187         , enclosureAction(
0188             noContentMap,
0189             &PodcastReader::beginEnclosure )
0190         , guidAction(
0191             textMap,
0192             &PodcastReader::beginText,
0193             &PodcastReader::endGuid,
0194             &PodcastReader::readCharacters )
0195         , pubDateAction(
0196             textMap,
0197             &PodcastReader::beginText,
0198             &PodcastReader::endPubDate,
0199             &PodcastReader::readCharacters )
0200         , keywordsAction(
0201             textMap,
0202             &PodcastReader::beginText,
0203             &PodcastReader::endKeywords,
0204             &PodcastReader::readCharacters )
0205         , newFeedUrlAction(
0206             textMap,
0207             &PodcastReader::beginText,
0208             &PodcastReader::endNewFeedUrl,
0209             &PodcastReader::readCharacters )
0210
0211         // Atom
0212         , atomLogoAction(
0213             textMap,
0214             &PodcastReader::beginText,
0215             &PodcastReader::endImageUrl,
0216             &PodcastReader::readCharacters )
0217         , atomIconAction(
0218             textMap,
0219             &PodcastReader::beginText,
0220             &PodcastReader::endAtomIcon,
0221             &PodcastReader::readCharacters )
0222         , atomEntryAction(
0223             atomEntryMap,
0224             &PodcastReader::beginItem,
0225             &PodcastReader::endItem )
0226         , atomTitleAction(
0227             atomTextMap,
0228             &PodcastReader::beginAtomText,
0229             &PodcastReader::endAtomTitle,
0230             &PodcastReader::readAtomTextCharacters )
0231         , atomSubtitleAction(
0232             atomTextMap,
0233             &PodcastReader::beginAtomText,
0234             &PodcastReader::endAtomSubtitle,
0235             &PodcastReader::readAtomTextCharacters )
0236         , atomAuthorAction(
0237             atomAuthorMap )
0238         , atomFeedLinkAction(
0239             noContentMap,
0240             &PodcastReader::beginAtomFeedLink,
0241             nullptr,
0242             &PodcastReader::readNoCharacters )
0243         , atomEntryLinkAction(
0244             noContentMap,
0245             &PodcastReader::beginAtomEntryLink,
0246             nullptr,
0247             &PodcastReader::readNoCharacters )
0248         , atomIdAction(
0249             textMap,
0250             &PodcastReader::beginText,
0251             &PodcastReader::endGuid,
0252             &PodcastReader::readCharacters )
0253         , atomPublishedAction(
0254             textMap,
0255             &PodcastReader::beginText,
0256             &PodcastReader::endAtomPublished,
0257             &PodcastReader::readCharacters )
0258         , atomUpdatedAction(
0259             textMap,
0260             &PodcastReader::beginText,
0261             &PodcastReader::endAtomUpdated,
0262             &PodcastReader::readCharacters )
0263         , atomSummaryAction(
0264             atomTextMap,
0265             &PodcastReader::beginAtomText,
0266             &PodcastReader::endAtomSummary,
0267             &PodcastReader::readAtomTextCharacters )
0268         , atomContentAction(
0269             atomTextMap,
0270             &PodcastReader::beginAtomText,
0271             &PodcastReader::endAtomContent,
0272             &PodcastReader::readAtomTextCharacters )
0273         , atomTextAction(
0274             atomTextMap,
0275             &PodcastReader::beginAtomTextChild,
0276             &PodcastReader::endAtomTextChild,
0277             &PodcastReader::readAtomTextCharacters )
0278 {
0279     // known elements:
0280     knownElements[ QStringLiteral("rss")          ] = Rss;
0281     knownElements[ QStringLiteral("RDF")          ] = Rdf;
0282     knownElements[ QStringLiteral("feed")         ] = Feed;
0283     knownElements[ QStringLiteral("channel")      ] = Channel;
0284     knownElements[ QStringLiteral("item")         ] = Item;
0285     knownElements[ QStringLiteral("image")        ] = Image;
0286     knownElements[ QStringLiteral("link")         ] = Link;
0287     knownElements[ QStringLiteral("url")          ] = Url;
0288     knownElements[ QStringLiteral("title")        ] = Title;
0289     knownElements[ QStringLiteral("author")       ] = Author;
0290     knownElements[ QStringLiteral("enclosure")    ] = EnclosureElement;
0291     knownElements[ QStringLiteral("guid")         ] = Guid;
0292     knownElements[ QStringLiteral("pubDate")      ] = PubDate;
0293     knownElements[ QStringLiteral("description")  ] = Description;
0294     knownElements[ QStringLiteral("summary")      ] = Summary;
0295     knownElements[ QStringLiteral("body")         ] = Body;
0296     knownElements[ QStringLiteral("entry")        ] = Entry;
0297     knownElements[ QStringLiteral("content")      ] = Content;
0298     knownElements[ QStringLiteral("name")         ] = Name;
0299     knownElements[ QStringLiteral("id")           ] = Id;
0300     knownElements[ QStringLiteral("subtitle")     ] = Subtitle;
0301     knownElements[ QStringLiteral("updated")      ] = Updated;
0302     knownElements[ QStringLiteral("published")    ] = Published;
0303     knownElements[ QStringLiteral("logo")         ] = Logo;
0304     knownElements[ QStringLiteral("icon")         ] = Icon;
0305     knownElements[ QStringLiteral("encoded")      ] = Encoded;
0306     knownElements[ QStringLiteral("creator")      ] = Creator;
0307     knownElements[ QStringLiteral("keywords")     ] = Keywords;
0308     knownElements[ QStringLiteral("new-feed-url") ] = NewFeedUrl;
0309     knownElements[ QStringLiteral("html")         ] = Html;
0310     knownElements[ QStringLiteral("HTML")         ] = Html;
0311
0312     // before start document/after end document
0313     rootMap.insert( Document, &docAction );
0314
0315     // parse document
0316     docMap.insert( Rss, &rssAction );
0317     docMap.insert( Html, &htmlAction );
0318     docMap.insert( Rdf, &rdfAction );
0319     docMap.insert( Feed, &feedAction );
0320     docMap.insert( Any, &unknownFeedTypeAction );
0321
0322     // parse <rss> "RSS 2.0"
0323     rssMap.insert( Channel, &rss20ChannelAction );
0324
0325     // parse <RDF> "RSS 1.0"
0326     rdfMap.insert( Channel, &rss10ChannelAction );
0327     rdfMap.insert( Item, &itemAction );
0328
0329     // parse <channel> "RSS 2.0"
0330     rss20ChannelMap.insert( Title, &titleAction );
0331     rss20ChannelMap.insert( ItunesSubtitle, &subtitleAction );
0332     rss20ChannelMap.insert( ItunesAuthor, &authorAction );
0333     rss20ChannelMap.insert( Creator, &creatorAction );
0334     rss20ChannelMap.insert( Description, &descriptionAction );
0335     rss20ChannelMap.insert( Encoded, &encodedAction );
0336     rss20ChannelMap.insert( ItunesSummary, &descriptionAction );
0337     rss20ChannelMap.insert( Body, &bodyAction );
0338     rss20ChannelMap.insert( Link, &linkAction );
0339     rss20ChannelMap.insert( Image, &imageAction );
0340     rss20ChannelMap.insert( ItunesKeywords, &keywordsAction );
0341     rss20ChannelMap.insert( NewFeedUrl, &newFeedUrlAction );
0342     rss20ChannelMap.insert( Item, &itemAction );
0343
0344     // parse <channel> "RSS 1.0"
0345     rss10ChannelMap.insert( Title, &titleAction );
0346     rss10ChannelMap.insert( ItunesSubtitle, &subtitleAction );
0347     rss10ChannelMap.insert( ItunesAuthor, &authorAction );
0348     rss10ChannelMap.insert( Creator, &creatorAction );
0349     rss10ChannelMap.insert( Description, &descriptionAction );
0350     rss10ChannelMap.insert( Encoded, &encodedAction );
0351     rss10ChannelMap.insert( ItunesSummary, &descriptionAction );
0352     rss10ChannelMap.insert( Body, &bodyAction );
0353     rss10ChannelMap.insert( Link, &linkAction );
0354     rss10ChannelMap.insert( Image, &imageAction );
0355     rss10ChannelMap.insert( ItunesKeywords, &keywordsAction );
0356     rss10ChannelMap.insert( NewFeedUrl, &newFeedUrlAction );
0357
0358     // parse <image>
0359     imageMap.insert( Title, &skipAction );
0360     imageMap.insert( Link, &skipAction );
0361     imageMap.insert( Url, &urlAction );
0362
0363     // parse <item>
0364     itemMap.insert( Title, &titleAction );
0365     itemMap.insert( ItunesSubtitle, &subtitleAction );
0366     itemMap.insert( Author, &authorAction );
0367     itemMap.insert( ItunesAuthor, &authorAction );
0368     itemMap.insert( Creator, &creatorAction );
0369     itemMap.insert( Description, &descriptionAction );
0370     itemMap.insert( Encoded, &encodedAction );
0371     itemMap.insert( ItunesSummary, &descriptionAction );
0372     itemMap.insert( Body, &bodyAction );
0373     itemMap.insert( EnclosureElement, &enclosureAction );
0374     itemMap.insert( Guid, &guidAction );
0375     itemMap.insert( PubDate, &pubDateAction );
0376     itemMap.insert( ItunesKeywords, &keywordsAction );
0377     // TODO: move the link field from PodcastChannel to PodcastMetaCommon
0378     // itemMap.insert( Link, &linkAction );
0379
0380     // parse <feed> "Atom"
0381     feedMap.insert( Title, &atomTitleAction );
0382     feedMap.insert( Subtitle, &atomSubtitleAction );
0383     feedMap.insert( Icon, &atomIconAction );
0384     feedMap.insert( Logo, &atomLogoAction );
0385     feedMap.insert( Author, &atomAuthorAction );
0386     feedMap.insert( Link, &atomFeedLinkAction );
0387     feedMap.insert( Entry, &atomEntryAction );
0388
0389     // parse <entry> "Atom"
0390     atomEntryMap.insert( Title, &atomTitleAction );
0391     atomEntryMap.insert( Subtitle, &atomSubtitleAction );
0392     atomEntryMap.insert( Author, &atomAuthorAction );
0393     atomEntryMap.insert( Id, &atomIdAction );
0394     atomEntryMap.insert( Published, &atomPublishedAction );
0395     atomEntryMap.insert( Updated, &atomUpdatedAction );
0396     atomEntryMap.insert( Summary, &atomSummaryAction );
0397     atomEntryMap.insert( Link, &atomEntryLinkAction );
0398     atomEntryMap.insert( SupportedContent, &atomContentAction );
0399
0400     // parse <author> "Atom"
0401     atomAuthorMap.insert( Name, &authorAction );
0402
0403     // parse atom text
0404     atomTextMap.insert( Any, &atomTextAction );
0405
0406     // parse arbitrary xml
0407     xmlMap.insert( Any, &xmlAction );
0408
0409     // skip elements
0410     skipMap.insert( Any, &skipAction );
0411 }
0412
0413 PodcastReader::~PodcastReader()
0414 {
0415     DEBUG_BLOCK
0416 }
0417
0418 bool
0419 PodcastReader::mightBeHtml( const QString& text ) //Static
0420 {
0421     return sd.mightBeHtml.indexIn( text ) != -1;
0422 }
0423
0424 bool PodcastReader::read( QIODevice *device )
0425 {
0426     DEBUG_BLOCK
0427
0428     m_xmlReader.setDevice( device );
0429     return read();
0430 }
0431
0432 bool
0433 PodcastReader::read( const QUrl &url )
0434 {
0435     DEBUG_BLOCK
0436
0437     m_url = url;
0438
0439     m_transferJob = KIO::get( m_url, KIO::Reload, KIO::HideProgressInfo );
0440
0441     connect( m_transferJob, &KIO::TransferJob::data,
0442              this, &PodcastReader::slotAddData );
0443
0444     connect( m_transferJob, &KIO::TransferJob::result,
0445              this, &PodcastReader::downloadResult );
0446
0447     connect( m_transferJob, &KIO::TransferJob::redirection,
0448              this, &PodcastReader::slotRedirection );
0449
0450     connect( m_transferJob, &KIO::TransferJob::permanentRedirection,
0451              this, &PodcastReader::slotPermanentRedirection );
0452
0453     QString description = i18n( "Importing podcast channel from %1", url.url() );
0454     if( m_channel )
0455     {
0456         description = m_channel->title().isEmpty()
0457                       ? i18n( "Updating podcast channel" )
0458                       : i18n( "Updating \"%1\"", m_channel->title() );
0459     }
0460
0461     Q_EMIT statusBarNewProgressOperation( m_transferJob, description, this );
0462
0463     // parse data
0464     return read();
0465 }
0466
0467 void
0468 PodcastReader::slotAbort()
0469 {
0470     DEBUG_BLOCK
0471 }
0472
0473 bool
0474 PodcastReader::update( const PodcastChannelPtr &channel )
0475 {
0476     DEBUG_BLOCK
0477     m_channel = channel;
0478
0479     return read( m_channel->url() );
0480 }
0481
0482 void
0483 PodcastReader::slotAddData( KIO::Job *job, const QByteArray &data )
0484 {
0485     DEBUG_BLOCK
0486     Q_UNUSED( job )
0487
0488     m_xmlReader.addData( data );
0489
0490     // parse more data
0491     continueRead();
0492 }
0493
0494 void
0495 PodcastReader::downloadResult( KJob * job )
0496 {
0497     DEBUG_BLOCK
0498
0499     // parse more data
0500     continueRead();
0501
0502     KIO::TransferJob *transferJob = dynamic_cast<KIO::TransferJob *>( job );
0503     if( transferJob && transferJob->isErrorPage() )
0504     {
0505         QString errorMessage =
0506             i18n( "Importing podcast from %1 failed with error:\n", m_url.url() );
0507         if( m_channel )
0508         {
0509             errorMessage = m_channel->title().isEmpty()
0510                            ? i18n( "Updating podcast from %1 failed with error:\n", m_url.url() )
0511                            : i18n( "Updating \"%1\" failed with error:\n", m_channel->title() );
0512         }
0513         errorMessage = errorMessage.append( job->errorString() );
0514
0515         Q_EMIT statusBarErrorMessage( errorMessage );
0516     }
0517     else if( job->error() )
0518     {
0519         QString errorMessage =
0520             i18n( "Importing podcast from %1 failed with error:\n", m_url.url() );
0521         if( m_channel )
0522         {
0523             errorMessage = m_channel->title().isEmpty()
0524                            ? i18n( "Updating podcast from %1 failed with error:\n", m_url.url() )
0525                            : i18n( "Updating \"%1\" failed with error:\n", m_channel->title() );
0526         }
0527         errorMessage = errorMessage.append( job->errorString() );
0528
0529         Q_EMIT statusBarErrorMessage( errorMessage );
0530     }
0531
0532     m_transferJob = nullptr;
0533 }
0534
0535 PodcastReader::ElementType
0536 PodcastReader::elementType() const
0537 {
0538     if( m_xmlReader.isEndDocument() || m_xmlReader.isStartDocument() )
0539         return Document;
0540
0541     if( m_xmlReader.isCDATA() || m_xmlReader.isCharacters() )
0542         return CharacterData;
0543
0544     ElementType elementType = sd.knownElements[ m_xmlReader.name().toString()];
0545
0546     // This is a bit hacky because my automata does not support conditions.
0547     // Therefore I put the decision logic in here and declare some pseudo elements.
0548     // I don't think it is worth it to extend the automata to support such conditions.
0549     switch( elementType )
0550     {
0551         case Summary:
0552             if( m_xmlReader.namespaceUri() == ITUNES_NS )
0553             {
0554                 elementType = ItunesSummary;
0555             }
0556             break;
0557
0558         case Subtitle:
0559             if( m_xmlReader.namespaceUri() == ITUNES_NS )
0560             {
0561                 elementType = ItunesSubtitle;
0562             }
0563             break;
0564
0565         case Author:
0566             if( m_xmlReader.namespaceUri() == ITUNES_NS )
0567             {
0568                 elementType = ItunesAuthor;
0569             }
0570             break;
0571
0572         case Keywords:
0573             if( m_xmlReader.namespaceUri() == ITUNES_NS )
0574             {
0575                 elementType = ItunesKeywords;
0576             }
0577             break;
0578
0579         case Content:
0580             if( m_xmlReader.namespaceUri() == ATOM_NS &&
0581                     // ignore atom:content elements that do not
0582                     // have content but only refer to some url:
0583                     !hasAttribute( ATOM_NS, "src" ) )
0584             {
0585                 // Atom supports arbitrary Base64 encoded content.
0586                 // Because we can only something with text/html/xhtml I ignore
0587                 // anything else.
0588                 // See:
0589                 //    http://tools.ietf.org/html/rfc4287#section-4.1.3
0590                 if( hasAttribute( ATOM_NS, "type" ) )
0591                 {
0592                     QStringRef type( attribute( ATOM_NS, "type" ) );
0593
0594                     if( type == "text" || type == "html" || type == "xhtml" )
0595                     {
0596                         elementType = SupportedContent;
0597                     }
0598                 }
0599                 else
0600                 {
0601                     elementType = SupportedContent;
0602                 }
0603             }
0604             break;
0605
0606         default:
0607             break;
0608     }
0609
0610     return elementType;
0611 }
0612
0613 bool
0614 PodcastReader::read()
0615 {
0616     DEBUG_BLOCK
0617
0618     m_current = nullptr;
0619     m_item    = nullptr;
0620     m_contentType = TextContent;
0621     m_buffer.clear();
0622     m_actionStack.clear();
0623     m_actionStack.push( &( PodcastReader::sd.startAction ) );
0624     m_xmlReader.setNamespaceProcessing( true );
0625
0626     return continueRead();
0627 }
0628
0629 bool
0630 PodcastReader::continueRead()
0631 {
0632     // this is some kind of pushdown automata
0633     // with this it should be possible to parse feeds in parallel
0634     // without using threads
0635     DEBUG_BLOCK
0636
0637     while( !m_xmlReader.atEnd() && m_xmlReader.error() != QXmlStreamReader::CustomError )
0638     {
0639         QXmlStreamReader::TokenType token = m_xmlReader.readNext();
0640
0641         if( m_xmlReader.error() == QXmlStreamReader::PrematureEndOfDocumentError && m_transferJob )
0642         {
0643             return true;
0644         }
0645
0646         if( m_xmlReader.hasError() )
0647         {
0648             Q_EMIT finished( this );
0649             return false;
0650         }
0651
0652         if( m_actionStack.isEmpty() )
0653         {
0654             debug() << "expected element on stack!";
0655             return false;
0656         }
0657
0658         const Action* action = m_actionStack.top();
0659         const Action* subAction = nullptr;
0660
0661         switch( token )
0662         {
0663             case QXmlStreamReader::Invalid:
0664                 return false;
0665
0666             case QXmlStreamReader::StartDocument:
0667             case QXmlStreamReader::StartElement:
0668                 subAction = action->actionMap()[ elementType()];
0669
0670                 if( !subAction )
0671                     subAction = action->actionMap()[ Any ];
0672
0673                 if( !subAction )
0674                     subAction = &( PodcastReader::sd.skipAction );
0675
0676                 m_actionStack.push( subAction );
0677
0678                 subAction->begin( this );
0679                 break;
0680
0681             case QXmlStreamReader::EndDocument:
0682             case QXmlStreamReader::EndElement:
0683                 action->end( this );
0684
0685                 if( m_actionStack.pop() != action )
0686                 {
0687                     debug() << "popped other element than expected!";
0688                 }
0689                 break;
0690
0691             case QXmlStreamReader::Characters:
0692                 if( !m_xmlReader.isWhitespace() || m_xmlReader.isCDATA() )
0693                 {
0694                     action->characters( this );
0695                 }
0696             break;
0697                 // ignorable whitespaces
0698             case QXmlStreamReader::Comment:
0699             case QXmlStreamReader::EntityReference:
0700             case QXmlStreamReader::ProcessingInstruction:
0701             case QXmlStreamReader::DTD:
0702             case QXmlStreamReader::NoToken:
0703                 // ignore
0704                 break;
0705         }
0706     }
0707
0708     return !m_xmlReader.hasError();
0709 }
0710
0711 void
0712 PodcastReader::stopWithError( const QString &message )
0713 {
0714     m_xmlReader.raiseError( message );
0715
0716     if( m_transferJob )
0717     {
0718         m_transferJob->kill(KJob::EmitResult);
0719         m_transferJob = nullptr;
0720     }
0721
0722     Q_EMIT finished( this );
0723 }
0724
0725 void
0726 PodcastReader::beginText()
0727 {
0728     m_buffer.clear();
0729 }
0730
0731 void
0732 PodcastReader::endTitle()
0733 {
0734     m_current->setTitle( m_buffer.trimmed() );
0735 }
0736
0737 void
0738 PodcastReader::endSubtitle()
0739 {
0740     m_current->setSubtitle( m_buffer.trimmed() );
0741 }
0742
0743 QString
0744 PodcastReader::atomTextAsText()
0745 {
0746     switch( m_contentType )
0747     {
0748         case HtmlContent:
0749         case XHtmlContent:
0750             // TODO: strip tags (there should not be any non-xml entities here)
0751             return unescape( m_buffer );
0752
0753         case TextContent:
0754         default:
0755             return m_buffer;
0756     }
0757 }
0758
0759 QString
0760 PodcastReader::atomTextAsHtml()
0761 {
0762     switch( m_contentType )
0763     {
0764         case HtmlContent:
0765         case XHtmlContent:
0766             // strip <script> elements
0767             // This will work because there aren't <![CDATA[ ]]> sections
0768             // in m_buffer, because we have (re)escape the code manually.
0769             // XXX: But it does not remove event handlers like onclick="..."
0770             // and JavaScript links like href="javascript:..."
0771             return m_buffer.remove( sd.removeScripts );
0772
0773         case TextContent:
0774         default:
0775             return textToHtml( m_buffer );
0776     }
0777 }
0778
0779 QString
0780 PodcastReader::unescape( const QString &text )
0781 {
0782     // TODO: resolve predefined html entities
0783     QString buf;
0784
0785     for ( int i = 0; i < text.size(); ++ i )
0786     {
0787         QChar c( text[ i ] );
0788
0789         if( c == '&' )
0790         {
0791             int endIndex = text.indexOf( QLatin1Char(';'), i );
0792
0793             if( endIndex == -1 )
0794             {
0795                 // fix invalid input
0796                 buf += c;
0797             }
0798             else if( text[ i + 1 ] == '#' )
0799             {
0800                 int num = 0;
0801                 bool ok = false;
0802                 if( text[ i + 2 ] == 'x' )
0803                 {
0804                     QString entity( text.mid( i + 3, endIndex - i - 3 ) );
0805                     num = entity.toInt( &ok, 16 );
0806                 }
0807                 else
0808                 {
0809                     QString entity( text.mid( i + 2, endIndex - i - 2 ) );
0810                     num = entity.toInt( &ok, 10 );
0811                 }
0812
0813                 if( !ok || num < 0 )
0814                 {
0815                     // fix invalid input
0816                     buf += c;
0817                 }
0818                 else
0819                 {
0820                     buf += QChar( num );
0821                     i = endIndex;
0822                 }
0823             }
0824             else
0825             {
0826                 QString entity( text.mid( i + 1, endIndex - i - 1 ) );
0827
0828                 if( entity == QLatin1String("lt") )
0829                 {
0830                     buf += QLatin1Char('<');
0831                     i = endIndex;
0832                 }
0833                 else if( entity == QLatin1String("gt") )
0834                 {
0835                     buf += QLatin1Char('>');
0836                     i = endIndex;
0837                 }
0838                 else if( entity == QLatin1String("amp") )
0839                 {
0840                     buf += QLatin1Char('&');
0841                     i = endIndex;
0842                 }
0843                 else if( entity == QLatin1String("apos") )
0844                 {
0845                     buf += QLatin1Char('\'');
0846                     i = endIndex;
0847                 }
0848                 else if( entity == QLatin1String("quot") )
0849                 {
0850                     buf += QLatin1Char('"');
0851                     i = endIndex;
0852                 }
0853                 else
0854                 {
0855                     // fix invalid input
0856                     buf += c;
0857                 }
0858             }
0859         }
0860         else
0861         {
0862             buf += c;
0863         }
0864     }
0865
0866     return buf;
0867 }
0868
0869 void
0870 PodcastReader::setSummary( const QString &description )
0871 {
0872     if( m_current->summary().size() < description.size() )
0873     {
0874         m_current->setSummary( description );
0875     }
0876 }
0877
0878 void
0879 PodcastReader::setDescription( const QString &description )
0880 {
0881     // The content of the <description>, <itunes:summary> or <body>
0882     // elements might be assigned to the field description, unless
0883     // there is already longer data in it. Then it will be assigned
0884     // to summary, unless summary depending on whether there
0885     // already is some (longer) information in the description
0886     // field.
0887     // If there is already data in the description field, instead of
0888     // overwriting, it will be moved to the summary field, unless
0889     // there is already longer data there.
0890     if( m_current->description().size() < description.size() )
0891     {
0892         setSummary( m_current->description() );
0893         m_current->setDescription( description );
0894     }
0895     else
0896     {
0897         setSummary( description );
0898     }
0899 }
0900
0901 void
0902 PodcastReader::endDescription()
0903 {
0904     QString description( m_buffer.trimmed() );
0905
0906     if( !mightBeHtml( description ) )
0907     {
0908         // content type is plain text
0909         description = textToHtml( description );
0910     }
0911     // else: content type is html
0912     setDescription( description );
0913 }
0914
0915 QString
0916 PodcastReader::textToHtml( const QString &text )
0917 {
0918     QString buf;
0919     QRegExp re( sd.linkify );
0920     int index = 0;
0921
0922     for(;;)
0923     {
0924         int next = re.indexIn( text, index );
0925
0926         if( next == -1 )
0927             break;
0928
0929         if( next != index )
0930         {
0931             buf += text.mid( index, next - index ).toHtmlEscaped();
0932         }
0933
0934         QString s;
0935
0936         if( !(s = re.cap( 1 )).isEmpty() )
0937         {
0938             if( s.startsWith( QLatin1String( "javascript:" ), Qt::CaseInsensitive ) ||
0939                 s.startsWith( QLatin1String( "exec:" ), Qt::CaseInsensitive ) )
0940             {
0941                 buf += s.toHtmlEscaped();
0942             }
0943             else
0944             {
0945                 buf += QStringLiteral( "<a href=\"%1\">%1</a>" )
0946                     .arg( s.toHtmlEscaped() );
0947             }
0948         }
0949         else if( !(s = re.cap( 2 )).isEmpty() )
0950         {
0951             buf += QStringLiteral( "<a href=\"mailto:%1\">%1</a>" )
0952                 .arg( s.toHtmlEscaped() );
0953         }
0954         else if( !re.cap( 3 ).isEmpty() )
0955         {
0956             buf += QLatin1String("<br/>\n");
0957         }
0958
0959         index = re.pos() + re.matchedLength();
0960     }
0961
0962     buf += text.mid( index ).toHtmlEscaped();
0963
0964     return buf;
0965 }
0966
0967 void
0968 PodcastReader::endEncoded()
0969 {
0970     // content type is html
0971     setDescription( m_buffer.trimmed() );
0972 }
0973
0974 void
0975 PodcastReader::endBody()
0976 {
0977     // content type is xhtml
0978     // always prefer <body>, because it's likely to
0979     // contain nice html formatted information
0980     setSummary( m_current->description() );
0981     m_current->setDescription( m_buffer.trimmed() );
0982 }
0983
0984 void
0985 PodcastReader::endLink()
0986 {
0987     // TODO: change to m_current->... when the field
0988     //       is moved to the PodcastMetaCommon class.
0989     m_channel->setWebLink( QUrl( m_buffer ) );
0990 }
0991
0992 void
0993 PodcastReader::beginHtml()
0994 {
0995     stopWithError( i18n( "While parsing %1, a feed was expected but an HTML page was received."
0996                          "\nDid you enter the correct URL?", m_url.url() ) );
0997 }
0998
0999 void
1000 PodcastReader::beginUnknownFeedType()
1001 {
1002     stopWithError( i18n( "Feed has an unknown type: %1", m_url.url() ) );
1003 }
1004
1005 void
1006 PodcastReader::beginRss()
1007 {
1008     if( m_xmlReader.attributes().value( QStringLiteral("version") ) != "2.0" )
1009     {
1010         // TODO: change this string once we support more
1011         stopWithError( i18n( "%1 is not an RSS version 2.0 feed.", m_url.url() ) );
1012     }
1013 }
1014
1015 void
1016 PodcastReader::beginRdf()
1017 {
1018     bool ok = true;
1019     if( m_xmlReader.namespaceUri() != RDF_NS )
1020     {
1021         ok = false;
1022     }
1023
1024     if( ok )
1025     {
1026         bool found = false;
1027         foreach( const QXmlStreamNamespaceDeclaration &nsdecl, m_xmlReader.namespaceDeclarations() )
1028         {
1029             if( nsdecl.namespaceUri() == RSS10_NS )
1030             {
1031                 found = true;
1032                 break;
1033             }
1034         }
1035
1036         if( !found )
1037             ok = false;
1038     }
1039
1040     if( !ok )
1041         stopWithError( i18n( "%1 is not a valid RSS version 1.0 feed.", m_url.url() ) );
1042 }
1043
1044 void
1045 PodcastReader::beginFeed()
1046 {
1047     if( m_xmlReader.namespaceUri() != ATOM_NS )
1048     {
1049         stopWithError( i18n( "%1 is not a valid Atom feed.", m_url.url() ) );
1050     }
1051     else
1052     {
1053         beginChannel();
1054     }
1055 }
1056
1057 void
1058 PodcastReader::endDocument()
1059 {
1060     debug() << "successfully parsed feed: " << m_url.url();
1061     Q_EMIT finished( this );
1062 }
1063
1064 void
1065 PodcastReader::createChannel()
1066 {
1067     if( !m_channel )
1068     {
1069         debug() << "new channel";
1070
1071         Podcasts::PodcastChannelPtr channel( new Podcasts::PodcastChannel() );
1072         channel->setUrl( m_url );
1073         channel->setSubscribeDate( QDate::currentDate() );
1074         /* add this new channel to the provider, we get a pointer to a
1075          * PodcastChannelPtr of the correct type which we will use from now on.
1076          */
1077         m_channel = m_podcastProvider->addChannel( channel );
1078     }
1079 }
1080
1081 void
1082 PodcastReader::beginChannel()
1083 {
1084     createChannel();
1085
1086     m_current = m_channel.data();
1087
1088     // Because the summary and description fields are read from several elements
1089     // they only get changed when longer information is read as there is stored in
1090     // the appropriate field already. In order to still be able to correctly update
1091     // the feed's description/summary I set it here to the empty string:
1092     m_channel->setDescription( QLatin1String("") );
1093     m_channel->setSummary( QLatin1String("") );
1094     m_channel->setKeywords( QStringList() );
1095 }
1096
1097 void
1098 PodcastReader::beginItem()
1099 {
1100     // theoretically it is possible that an ugly RSS 1.0 feed has
1101     // first the <item> elements followed by the <channel> element:
1102     createChannel();
1103
1104     m_item = new Podcasts::PodcastEpisode( m_channel );
1105     m_current = m_item.data();
1106
1107     m_enclosures.clear();
1108 }
1109
1110 void
1111 PodcastReader::endItem()
1112 {
1113     // TODO: change superclass of PodcastEpisode to MultiTrack
1114
1115     /*  some feeds contain normal blogposts without
1116         enclosures alongside of podcasts */
1117
1118     if( !m_enclosures.isEmpty() )
1119     {
1120         // just take the first enclosure on multi
1121         m_item->setUidUrl( m_enclosures[ 0 ].url() );
1122         m_item->setFilesize( m_enclosures[ 0 ].fileSize() );
1123         m_item->setMimeType( m_enclosures[ 0 ].mimeType() );
1124
1125         m_enclosures.removeAt( 0 );
1126
1127         // append alternative enclosures to description
1128         if( !m_enclosures.isEmpty() )
1129         {
1130             QString description( m_item->description() );
1131             description += QLatin1String("\n<p><b>");
1132             description += i18n( "Alternative Enclosures:" );
1133             description += QLatin1String("</b><br/>\n<ul>");
1134
1135             foreach( const Enclosure& enclosure, m_enclosures )
1136             {
1137                 description += QStringLiteral( "<li><a href=\"%1\">%2</a> (%3, %4)</li>" )
1138                                .arg( enclosure.url().url().toHtmlEscaped(),
1139                                      enclosure.url().fileName().toHtmlEscaped(),
1140                                      Meta::prettyFilesize( enclosure.fileSize() ),
1141                                      enclosure.mimeType().isEmpty() ?
1142                                      i18n( "unknown type" ) :
1143                                      enclosure.mimeType().toHtmlEscaped() );
1144             }
1145
1146             description += QLatin1String("</ul></p>");
1147             m_item->setDescription( description );
1148         }
1149
1150         Podcasts::PodcastEpisodePtr episode;
1151         QString guid = m_item->guid();
1152         if( guid.isEmpty() )
1153         {
1154              episode = Podcasts::PodcastEpisodePtr::dynamicCast(
1155                                               m_podcastProvider->trackForUrl( QUrl::fromUserInput(m_item->uidUrl()) )
1156                                           );
1157         }
1158         else
1159         {
1160             episode = m_podcastProvider->episodeForGuid( guid );
1161         }
1162
1163         //make sure that the episode is not a bogus match. The channel has to be correct.
1164         // See https://bugs.kde.org/show_bug.cgi?id=227515
1165         if( !episode.isNull() && episode->channel() == m_channel )
1166         {
1167             debug() << "updating episode: " << episode->title();
1168
1169             episode->setTitle( m_item->title() );
1170             episode->setSubtitle( m_item->subtitle() );
1171             episode->setSummary( m_item->summary() );
1172             episode->setDescription( m_item->description() );
1173             episode->setAuthor( m_item->author() );
1174             episode->setUidUrl( QUrl::fromUserInput(m_item->uidUrl()) );
1175             episode->setFilesize( m_item->filesize() );
1176             episode->setMimeType( m_item->mimeType() );
1177             episode->setPubDate( m_item->pubDate() );
1178             episode->setKeywords( m_item->keywords() );
1179
1180             // set the guid in case it was empty (for some buggy reason):
1181             episode->setGuid( m_item->guid() );
1182         }
1183         else
1184         {
1185             debug() << "new episode: " << m_item->title();
1186
1187             episode = m_channel->addEpisode( m_item );
1188             // also let the provider know an episode has been added
1189             // TODO: change into a signal
1190             m_podcastProvider->addEpisode( episode );
1191         }
1192     }
1193
1194     m_current = m_channel.data();
1195     m_item = nullptr;
1196 }
1197
1198 void
1199 PodcastReader::beginEnclosure()
1200 {
1201     // This should read both, RSS 2.0 and RSS 1.0 with mod_enclosure
1202     // <enclosure> elements.
1203     // See:
1204     //    http://www.rssboard.org/rss-specification
1205     //    http://www.xs4all.nl/~foz/mod_enclosure.html
1206     QStringRef str;
1207
1208     str = m_xmlReader.attributes().value( QStringLiteral("url") );
1209
1210     if( str.isEmpty() )
1211         str = attribute( RDF_NS, "about" );
1212
1213     if( str.isEmpty() )
1214     {
1215         debug() << "invalid enclosure containing no/empty url";
1216         return;
1217     }
1218
1219     QUrl url( str.toString() );
1220
1221     str = m_xmlReader.attributes().value( QStringLiteral("length") );
1222
1223     if( str.isEmpty() )
1224         str = attribute( ENC_NS, "length" );
1225
1226     int length = str.toString().toInt();
1227
1228     str = m_xmlReader.attributes().value( QStringLiteral("type") );
1229
1230     if( str.isEmpty() )
1231         str = attribute( ENC_NS, "type" );
1232
1233     QString mimeType( str.toString().trimmed() );
1234
1235     m_enclosures.append( Enclosure( url, length, mimeType ) );
1236 }
1237
1238 void
1239 PodcastReader::endGuid()
1240 {
1241     m_item->setGuid( m_buffer );
1242 }
1243
1244 void
1245 PodcastReader::endPubDate()
1246 {
1247     QDateTime pubDate( parsePubDate( m_buffer ) );
1248
1249     if( !pubDate.isValid() )
1250     {
1251         debug() << "invalid podcast episode pubDate: " << m_buffer;
1252         return;
1253     }
1254
1255     m_item->setPubDate( pubDate );
1256 }
1257
1258 void
1259 PodcastReader::beginImage()
1260 {
1261     if( m_xmlReader.namespaceUri() == ITUNES_NS )
1262     {
1263         m_channel->setImageUrl( QUrl( m_xmlReader.attributes().value( QStringLiteral("href") ).toString() ) );
1264     }
1265 }
1266
1267 void
1268 PodcastReader::endImageUrl()
1269 {
1270     // TODO save image data
1271     m_channel->setImageUrl( QUrl( m_buffer ) );
1272 }
1273
1274 void
1275 PodcastReader::endKeywords()
1276 {
1277     QList<QString> keywords( m_current->keywords() );
1278
1279     foreach( const QString &keyword, m_buffer.split( QLatin1Char(',') ) )
1280     {
1281         QString kwd( keyword.simplified() );
1282         if( !kwd.isEmpty() && !keywords.contains( kwd ) )
1283             keywords.append( kwd );
1284     }
1285
1286     std::sort( keywords.begin(), keywords.end() );
1287     m_current->setKeywords( keywords );
1288
1289 }
1290
1291 void
1292 PodcastReader::endNewFeedUrl()
1293 {
1294     if( m_xmlReader.namespaceUri() == ITUNES_NS )
1295     {
1296         m_url = QUrl( m_buffer.trimmed() );
1297
1298         if( m_channel && m_channel->url() != m_url )
1299         {
1300             debug() << "feed url changed to: " << m_url.url();
1301             m_channel->setUrl( m_url );
1302         }
1303     }
1304 }
1305
1306 void
1307 PodcastReader::endAuthor()
1308 {
1309     m_current->setAuthor( m_buffer.trimmed() );
1310 }
1311
1312 void
1313 PodcastReader::endCreator()
1314 {
1315     // there are funny people that do not use <author> but <dc:creator>
1316     if( m_xmlReader.namespaceUri() == DC_NS )
1317     {
1318         endAuthor();
1319     }
1320 }
1321
1322 void
1323 PodcastReader::beginXml()
1324 {
1325     m_buffer += '<';
1326     m_buffer += m_xmlReader.name().toString();
1327
1328     foreach( const QXmlStreamAttribute &attr, m_xmlReader.attributes() )
1329     {
1330         m_buffer += QStringLiteral( " %1=\"%2\"" )
1331                     .arg( attr.name().toString(),
1332                           attr.value().toString().toHtmlEscaped() );
1333     }
1334
1335     m_buffer += '>';
1336 }
1337
1338 void
1339 PodcastReader::beginNoElement()
1340 {
1341     DEBUG_BLOCK
1342     debug() << "no element expected here, but got element: "
1343     << m_xmlReader.name();
1344 }
1345
1346 void
1347 PodcastReader::beginAtomText()
1348 {
1349     if( hasAttribute( ATOM_NS, "type" ) )
1350     {
1351         QStringRef type( attribute( ATOM_NS, "type" ) );
1352
1353         if( type == "text" )
1354         {
1355             m_contentType = TextContent;
1356         }
1357         else if( type == "html" )
1358         {
1359             m_contentType = HtmlContent;
1360         }
1361         else if( type == "xhtml" )
1362         {
1363             m_contentType = XHtmlContent;
1364         }
1365         else
1366         {
1367             // this should not happen, see elementType()
1368             debug() << "unsupported atom:content type: " << type.toString();
1369             m_contentType = TextContent;
1370         }
1371     }
1372     else
1373     {
1374         m_contentType = TextContent;
1375     }
1376
1377     m_buffer.clear();
1378 }
1379
1380 void
1381 PodcastReader::beginAtomTextChild()
1382 {
1383     switch( m_contentType )
1384     {
1385         case XHtmlContent:
1386             beginXml();
1387             break;
1388
1389         case HtmlContent:
1390         case TextContent:
1391             // stripping illegal tags
1392             debug() << "read unexpected open tag in atom text: " << m_xmlReader.name();
1393
1394         default:
1395             break;
1396     }
1397 }
1398
1399 void
1400 PodcastReader::endAtomTextChild()
1401 {
1402     switch( m_contentType )
1403     {
1404         case XHtmlContent:
1405             endXml();
1406             break;
1407
1408         case HtmlContent:
1409         case TextContent:
1410             // stripping illegal tags
1411             debug() << "read unexpected close tag in atom text: " << m_xmlReader.name();
1412
1413         default:
1414             break;
1415     }
1416 }
1417
1418 void
1419 PodcastReader::readAtomTextCharacters()
1420 {
1421     switch( m_contentType )
1422     {
1423     case XHtmlContent:
1424         m_buffer += m_xmlReader.text().toString().toHtmlEscaped();
1425         break;
1426
1427     case HtmlContent:
1428         m_buffer += m_xmlReader.text();
1429         break;
1430
1431     case TextContent:
1432         m_buffer += m_xmlReader.text();
1433
1434     default:
1435         break;
1436     }
1437 }
1438
1439 void
1440 PodcastReader::beginAtomFeedLink()
1441 {
1442     if( !hasAttribute( ATOM_NS, "rel" ) ||
1443             attribute( ATOM_NS, "rel" ) == "alternate" )
1444     {
1445         m_channel->setWebLink( QUrl( attribute( ATOM_NS, "href" ).toString() ) );
1446     }
1447     else if( attribute( ATOM_NS, "rel" ) == "self" )
1448     {
1449         m_url = QUrl( attribute( ATOM_NS, "href" ).toString() );
1450
1451         if( m_channel && m_channel->url() != m_url )
1452         {
1453             debug() << "feed url changed to: " << m_url.url();
1454             m_channel->setUrl( m_url );
1455         }
1456     }
1457 }
1458
1459 void
1460 PodcastReader::beginAtomEntryLink()
1461 {
1462     if( attribute( ATOM_NS, "rel" ) == "enclosure" )
1463     {
1464         QUrl url( attribute( ATOM_NS, "href" ).toString() );
1465         int filesize = 0;
1466         QString mimeType;
1467
1468         if( hasAttribute( ATOM_NS, "length" ) )
1469         {
1470             filesize = attribute( ATOM_NS, "length" ).toString().toInt();
1471         }
1472
1473         if( hasAttribute( ATOM_NS, "type" ) )
1474         {
1475             mimeType = attribute( ATOM_NS, "type" ).toString();
1476         }
1477
1478         m_enclosures.append( Enclosure( url, filesize, mimeType ) );
1479     }
1480 }
1481
1482 void
1483 PodcastReader::endAtomIcon()
1484 {
1485     if( !m_channel->hasImage() )
1486     {
1487         endImageUrl();
1488     }
1489 }
1490
1491 void
1492 PodcastReader::endAtomTitle()
1493 {
1494     // TODO: don't convert text but store m_contentType
1495     m_current->setTitle( atomTextAsText().trimmed() );
1496 }
1497
1498 void
1499 PodcastReader::endAtomSubtitle()
1500 {
1501     // TODO: don't convert text but store m_contentType
1502     m_current->setSubtitle( atomTextAsText().trimmed() );
1503 }
1504
1505 void
1506 PodcastReader::endAtomSummary()
1507 {
1508     // TODO: don't convert text but store m_contentType
1509     m_current->setSummary( atomTextAsHtml().trimmed() );
1510 }
1511
1512 void
1513 PodcastReader::endAtomContent()
1514 {
1515     // TODO: don't convert text but store m_contentType
1516     m_current->setDescription( atomTextAsHtml() );
1517 }
1518
1519 void
1520 PodcastReader::endAtomPublished()
1521 {
1522     QDateTime date = QDateTime::fromString( m_buffer, Qt::ISODate );
1523
1524     if( !date.isValid() )
1525     {
1526         debug() << "invalid podcast episode atom:published date: " << m_buffer;
1527         return;
1528     }
1529
1530     if( !m_item->pubDate().isValid() || m_item->pubDate() < date )
1531     {
1532         m_item->setPubDate( date );
1533     }
1534 }
1535
1536 void
1537 PodcastReader::endAtomUpdated()
1538 {
1539     QDateTime date = QDateTime::fromString( m_buffer, Qt::ISODate );
1540
1541     if( !date.isValid() )
1542     {
1543         debug() << "invalid podcast episode atom:updated date: " << m_buffer;
1544         return;
1545     }
1546
1547     if( !m_item->pubDate().isValid() || m_item->pubDate() < date )
1548     {
1549         // TODO: add field updatedDate and use this (throughout amarok)
1550         m_item->setPubDate( date );
1551     }
1552 }
1553
1554 void
1555 PodcastReader::readNoCharacters()
1556 {
1557     DEBUG_BLOCK
1558     debug() << "no characters expected here";
1559 }
1560
1561 void
1562 PodcastReader::endXml()
1563 {
1564     m_buffer += QLatin1String("</");
1565     m_buffer += m_xmlReader.name().toString();
1566     m_buffer += '>';
1567 }
1568
1569 void
1570 PodcastReader::readCharacters()
1571 {
1572     m_buffer += m_xmlReader.text();
1573 }
1574
1575 void
1576 PodcastReader::readEscapedCharacters()
1577 {
1578     m_buffer += m_xmlReader.text().toString().toHtmlEscaped() ;
1579 }
1580
1581 QStringRef
1582 PodcastReader::attribute( const char *namespaceUri, const char *name ) const
1583 {
1584     // workaround, because Qt seems to have a bug:
1585     // when the default namespace is used attributes
1586     // aren't inside this namespace for some reason
1587     if( m_xmlReader.attributes().hasAttribute( namespaceUri, name ) )
1588         return m_xmlReader.attributes().value( namespaceUri, name );
1589     else
1590         return m_xmlReader.attributes().value( QString(), name );
1591 }
1592
1593 bool
1594 PodcastReader::hasAttribute( const char *namespaceUri, const char *name ) const
1595 {
1596     // see PodcastReader::attribute()
1597     if( m_xmlReader.attributes().hasAttribute( namespaceUri, name ) )
1598         return true;
1599     else
1600         return m_xmlReader.attributes().hasAttribute( QString(), name );
1601 }
1602
1603 QDateTime
1604 PodcastReader::parsePubDate( const QString &dateString )
1605 {
1606     DEBUG_BLOCK
1607     QString parseInput = dateString;
1608     debug() << "Parsing pubdate: " << parseInput;
1609
1610     QRegExp rfcDateDayRegex( QStringLiteral("^[A-Z]{1}[a-z]{2}\\s*,\\s*(.*)") );
1611     if( rfcDateDayRegex.indexIn( parseInput ) != -1 )
1612     {
1613         parseInput = rfcDateDayRegex.cap(1);
1614     }
1615     //Hack around a to strict RFCDate implementation in KDateTime.
1616     //See https://bugs.kde.org/show_bug.cgi?id=231062
1617     QRegExp rfcMonthLowercase( QStringLiteral("^\\d+\\s+\\b(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\\b") );
1618     if( rfcMonthLowercase.indexIn( parseInput ) != -1 )
1619     {
1620         QString lowerMonth = rfcMonthLowercase.cap( 1 );
1621         QString upperMonth = lowerMonth;
1622         upperMonth.replace( 0, 1, lowerMonth.at( 0 ).toUpper() );
1623         parseInput.replace( lowerMonth, upperMonth );
1624     }
1625
1626     QDateTime pubDate = QDateTime::fromString( parseInput, Qt::RFC2822Date );
1627
1628     debug() << "result: " << pubDate.toString();
1629     return pubDate;
1630 }
1631
1632 void
1633 PodcastReader::slotRedirection( KIO::Job * job, const QUrl &url )
1634 {
1635     DEBUG_BLOCK
1636     Q_UNUSED( job );
1637     debug() << "redirected to: " << url.url();
1638 }
1639
1640 void
1641 PodcastReader::slotPermanentRedirection( KIO::Job * job, const QUrl &fromUrl,
1642         const QUrl &toUrl )
1643 {
1644     DEBUG_BLOCK
1645     Q_UNUSED( job );
1646     Q_UNUSED( fromUrl );
1647     debug() << "permanently redirected to: " << toUrl.url();
1648     m_url = toUrl;
1649     /* change the url for existing feeds as well. Permanent redirection means the old one
1650     might disappear soon. */
1651     if( m_channel )
1652         m_channel->setUrl( m_url );
1653 }
1654
1655 Podcasts::PodcastEpisodePtr
1656 PodcastReader::podcastEpisodeCheck( Podcasts::PodcastEpisodePtr episode )
1657 {
1658 //     DEBUG_BLOCK
1659     Podcasts::PodcastEpisodePtr episodeMatch = episode;
1660     Podcasts::PodcastEpisodeList episodes = m_channel->episodes();
1661
1662 //     debug() << "episode title: " << episode->title();
1663 //     debug() << "episode url: " << episode->prettyUrl();
1664 //     debug() << "episode guid: " << episode->guid();
1665
1666     foreach( PodcastEpisodePtr match, episodes )
1667     {
1668 //         debug() << "match title: " << match->title();
1669 //         debug() << "match url: " << match->prettyUrl();
1670 //         debug() << "match guid: " << match->guid();
1671
1672         int score = 0;
1673         if( !episode->title().isEmpty() && episode->title() == match->title() )
1674             score += 1;
1675         if( !episode->prettyUrl().isEmpty() && episode->prettyUrl() == match->prettyUrl() )
1676             score += 3;
1677         if( !episode->guid().isEmpty() && episode->guid() == match->guid() )
1678             score += 3;
1679
1680 //         debug() << "score: " << score;
1681         if( score >= 3 )
1682         {
1683             episodeMatch = match;
1684             break;
1685         }
1686     }
1687
1688     return episodeMatch;
1689 }
1690