File indexing completed on 2024-04-21 04:47:52

0001 /****************************************************************************************
0002  * Copyright (c) 2010 Bart Cerneels <bart.cerneels@kde.org>                             *
0003  *               2009 Mathias Panzenböck <grosser.meister.morti@gmx.net>                *
0004  *                                                                                      *
0005  * This program is free software; you can redistribute it and/or modify it under        *
0006  * the terms of the GNU General Public License as published by the Free Software        *
0007  * Foundation; either version 2 of the License, or (at your option) any later           *
0008  * version.                                                                             *
0009  *                                                                                      *
0010  * This program is distributed in the hope that it will be useful, but WITHOUT ANY      *
0011  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A      *
0012  * PARTICULAR PURPOSE. See the GNU General Public License for more details.             *
0013  *                                                                                      *
0014  * You should have received a copy of the GNU General Public License along with         *
0015  * this program.  If not, see <http://www.gnu.org/licenses/>.                           *
0016  ****************************************************************************************/
0017 
0018 #include "OpmlParser.h"
0019 
0020 #include "core/support/Amarok.h"
0021 #include "core/support/Debug.h"
0022 
0023 #include <QFile>
0024 #include <QXmlStreamReader>
0025 
0026 #include <KLocalizedString>
0027 #include <kio/job.h>
0028 
0029 const QString OpmlParser::OPML_MIME = "text/x-opml+xml";
0030 
0031 const OpmlParser::StaticData OpmlParser::sd;
0032 
0033 OpmlParser::OpmlParser( const QUrl &url )
0034         : QObject()
0035         , ThreadWeaver::Job()
0036         , QXmlStreamReader()
0037         , m_url( url )
0038 {
0039 }
0040 
0041 OpmlParser::~OpmlParser()
0042 {
0043 }
0044 
0045 void
0046 OpmlParser::run(ThreadWeaver::JobPointer self, ThreadWeaver::Thread *thread)
0047 {
0048     Q_UNUSED(self);
0049     Q_UNUSED(thread);
0050     read( m_url );
0051 }
0052 
0053 void
0054 OpmlParser::defaultBegin(const ThreadWeaver::JobPointer& self, ThreadWeaver::Thread *thread)
0055 {
0056     Q_EMIT started(self);
0057     ThreadWeaver::Job::defaultBegin(self, thread);
0058 }
0059 
0060 void
0061 OpmlParser::defaultEnd(const ThreadWeaver::JobPointer& self, ThreadWeaver::Thread *thread)
0062 {
0063     ThreadWeaver::Job::defaultEnd(self, thread);
0064     if (!self->success()) {
0065         Q_EMIT failed(self);
0066     }
0067     Q_EMIT done(self);
0068 }
0069 
0070 bool
0071 OpmlParser::read( const QUrl &url )
0072 {
0073     m_url = url;
0074     if( m_url.isLocalFile() )
0075     {
0076         //read directly from local file
0077         QFile localFile( m_url.toLocalFile() );
0078         if( !localFile.open( QIODevice::ReadOnly ) )
0079         {
0080             debug() << "failed to open local OPML file " << m_url.url();
0081             return false;
0082         }
0083 
0084         return read( &localFile );
0085     }
0086 
0087     m_transferJob = KIO::get( m_url, KIO::Reload, KIO::HideProgressInfo );
0088 
0089     connect( m_transferJob, &KIO::TransferJob::data,
0090              this, &OpmlParser::slotAddData );
0091 
0092     connect( m_transferJob, &KIO::TransferJob::result,
0093              this, &OpmlParser::downloadResult );
0094 
0095     // parse data
0096     return read();
0097 }
0098 
0099 bool
0100 OpmlParser::read( QIODevice *device )
0101 {
0102     setDevice( device );
0103     return read();
0104 }
0105 
0106 void
0107 OpmlParser::slotAddData( KIO::Job *job, const QByteArray &data )
0108 {
0109     Q_UNUSED( job )
0110 
0111     QXmlStreamReader::addData( data );
0112 
0113     // parse more data
0114     continueRead();
0115 }
0116 
0117 void
0118 OpmlParser::downloadResult( KJob *job )
0119 {
0120     // parse more data
0121     continueRead();
0122 
0123     KIO::TransferJob *transferJob = dynamic_cast<KIO::TransferJob *>( job );
0124     if( job->error() || ( transferJob && transferJob->isErrorPage() ) )
0125     {
0126         QString errorMessage =
0127             i18n( "Reading OPML podcast from %1 failed with error:\n", m_url.url() );
0128         errorMessage = errorMessage.append( job->errorString() );
0129 
0130 //        Q_EMIT statusBarErrorMessage( errorMessage );
0131     }
0132 
0133     m_transferJob = nullptr;
0134 }
0135 
0136 void
0137 OpmlParser::slotAbort()
0138 {
0139     DEBUG_BLOCK
0140 }
0141 
0142 void
0143 OpmlParser::Action::begin( OpmlParser *opmlParser ) const
0144 {
0145     if( m_begin )
0146         (( *opmlParser ).*m_begin )();
0147 }
0148 
0149 void
0150 OpmlParser::Action::end( OpmlParser *opmlParser ) const
0151 {
0152     if( m_end )
0153         (( *opmlParser ).*m_end )();
0154 }
0155 
0156 void
0157 OpmlParser::Action::characters( OpmlParser *opmlParser ) const
0158 {
0159     if( m_characters )
0160         (( *opmlParser ).*m_characters )();
0161 }
0162 
0163 // initialization of the feed parser automata:
0164 OpmlParser::StaticData::StaticData()
0165     : startAction( rootMap )
0166     , docAction(
0167         docMap,
0168         nullptr,
0169         &OpmlParser::endDocument )
0170     , skipAction( skipMap )
0171     , noContentAction(
0172             noContentMap,
0173             &OpmlParser::beginNoElement,
0174             nullptr,
0175             &OpmlParser::readNoCharacters )
0176     , opmlAction(
0177             opmlMap,
0178             &OpmlParser::beginOpml )
0179     , headAction(
0180             headMap,
0181             nullptr,
0182             &OpmlParser::endHead )
0183     , titleAction(
0184             textMap,
0185             &OpmlParser::beginText,
0186             &OpmlParser::endTitle,
0187             &OpmlParser::readCharacters )
0188     , bodyAction( bodyMap )
0189     , outlineAction(
0190             outlineMap,
0191             &OpmlParser::beginOutline,
0192             &OpmlParser::endOutline )
0193 {
0194     // known elements:
0195     knownElements[ "opml" ] = Opml;
0196     knownElements[ "html" ] = Html;
0197     knownElements[ "HTML" ] = Html;
0198     knownElements[ "head" ] = Head;
0199     knownElements[ "title" ] = Title;
0200     knownElements[ "dateCreated" ] = DateCreated;
0201     knownElements[ "dateModified" ] = DateModified;
0202     knownElements[ "ownerName" ] = OwnerName;
0203     knownElements[ "ownerEmail" ] = OwnerEmail;
0204     knownElements[ "ownerId" ] = OwnerId;
0205     knownElements[ "docs" ] = Docs;
0206     knownElements[ "expansionState" ] = ExpansionState;
0207     knownElements[ "vertScrollState" ] = VertScrollState;
0208     knownElements[ "windowTop" ] = WindowTop;
0209     knownElements[ "windowLeft" ] = WindowLeft;
0210     knownElements[ "windowBottom" ] = WindowBottom;
0211     knownElements[ "windowRight" ] = WindowRight;
0212     knownElements[ "body" ] = Body;
0213     knownElements[ "outline" ] = Outline;
0214 
0215     // before start document/after end document
0216     rootMap.insert( Document, &docAction );
0217 
0218     // parse document
0219     docMap.insert( Opml, &opmlAction );
0220 //    docMap.insert( Html, &htmlAction );
0221 
0222     // parse <opml>
0223     opmlMap.insert( Head, &headAction );
0224     opmlMap.insert( Body, &bodyAction );
0225 
0226     // parse <head>
0227     headMap.insert( Title, &titleAction );
0228     headMap.insert( DateCreated, &skipAction );
0229     headMap.insert( DateModified, &skipAction );
0230     headMap.insert( OwnerName, &skipAction );
0231     headMap.insert( OwnerEmail, &skipAction );
0232     headMap.insert( OwnerId, &skipAction );
0233     headMap.insert( Docs, &skipAction );
0234     headMap.insert( ExpansionState, &skipAction );
0235     headMap.insert( VertScrollState, &skipAction );
0236     headMap.insert( WindowTop, &skipAction );
0237     headMap.insert( WindowLeft, &skipAction );
0238     headMap.insert( WindowBottom, &skipAction );
0239     headMap.insert( WindowRight, &skipAction );
0240 
0241     // parse <body>
0242     bodyMap.insert( Outline, &outlineAction );
0243 
0244     // parse <outline> in case of sub-elements
0245     outlineMap.insert( Outline, &outlineAction );
0246 
0247     // skip elements
0248     skipMap.insert( Any, &skipAction );
0249 
0250 }
0251 
0252 OpmlParser::ElementType
0253 OpmlParser::elementType() const
0254 {
0255     if( isEndDocument() || isStartDocument() )
0256         return Document;
0257 
0258     if( isCDATA() || isCharacters() )
0259         return CharacterData;
0260 
0261     ElementType elementType = sd.knownElements[ QXmlStreamReader::name().toString()];
0262 
0263     return elementType;
0264 }
0265 
0266 bool
0267 OpmlParser::read()
0268 {
0269     m_buffer.clear();
0270     m_actionStack.clear();
0271     m_actionStack.push( &( OpmlParser::sd.startAction ) );
0272     setNamespaceProcessing( false );
0273 
0274     return continueRead();
0275 }
0276 
0277 bool
0278 OpmlParser::continueRead()
0279 {
0280     // this is some kind of pushdown automata
0281     // with this it should be possible to parse feeds in parallel
0282     // without using threads
0283     DEBUG_BLOCK
0284 
0285     while( !atEnd() && error() != CustomError )
0286     {
0287         TokenType token = readNext();
0288 
0289         if( error() == PrematureEndOfDocumentError && m_transferJob )
0290             return true;
0291 
0292         if( hasError() )
0293         {
0294             Q_EMIT doneParsing();
0295             return false;
0296         }
0297 
0298         if( m_actionStack.isEmpty() )
0299         {
0300             debug() << "expected element on stack!";
0301             return false;
0302         }
0303 
0304         const Action* action = m_actionStack.top();
0305         const Action* subAction = nullptr;
0306 
0307         switch( token )
0308         {
0309             case Invalid:
0310             {
0311                 debug() << "invalid token received at line " << lineNumber();
0312                 debug() << "Error:\n" << errorString();
0313                 return false;
0314             }
0315 
0316             case StartDocument:
0317             case StartElement:
0318                 subAction = action->actionMap()[ elementType() ];
0319 
0320                 if( !subAction )
0321                     subAction = action->actionMap()[ Any ];
0322 
0323                 if( !subAction )
0324                     subAction = &( OpmlParser::sd.skipAction );
0325 
0326                 m_actionStack.push( subAction );
0327 
0328                 subAction->begin( this );
0329                 break;
0330 
0331             case EndDocument:
0332             case EndElement:
0333                 action->end( this );
0334 
0335                 if( m_actionStack.pop() != action )
0336                 {
0337                     debug() << "popped other element than expected!";
0338                 }
0339                 break;
0340 
0341             case Characters:
0342                 if( !isWhitespace() || isCDATA() )
0343                 {
0344                     action->characters( this );
0345                 }
0346 
0347                 // ignorable whitespaces
0348             case Comment:
0349             case EntityReference:
0350             case ProcessingInstruction:
0351             case DTD:
0352             case NoToken:
0353                 // ignore
0354                 break;
0355         }
0356     }
0357 
0358     return !hasError();
0359 }
0360 
0361 void
0362 OpmlParser::stopWithError( const QString &message )
0363 {
0364     raiseError( message );
0365 
0366     if( m_transferJob )
0367     {
0368         m_transferJob->kill( KJob::EmitResult );
0369         m_transferJob = nullptr;
0370     }
0371 
0372     Q_EMIT doneParsing();
0373 }
0374 
0375 void
0376 OpmlParser::beginOpml()
0377 {
0378     m_outlineStack.clear();
0379 }
0380 
0381 void
0382 OpmlParser::beginText()
0383 {
0384     m_buffer.clear();
0385 }
0386 
0387 void
0388 OpmlParser::beginOutline()
0389 {
0390     OpmlOutline *parent = m_outlineStack.empty() ? nullptr : m_outlineStack.top();
0391     OpmlOutline *outline = new OpmlOutline( parent );
0392     //adding outline to stack
0393     m_outlineStack.push( outline );
0394     if( parent )
0395     {
0396         parent->setHasChildren( true );
0397         parent->addChild( outline );
0398     }
0399 
0400     foreach( const QXmlStreamAttribute &attribute, attributes() )
0401         outline->addAttribute( attribute.name().toString(), attribute.value().toString() );
0402 
0403     Q_EMIT outlineParsed( outline );
0404 }
0405 
0406 void
0407 OpmlParser::beginNoElement()
0408 {
0409     debug() << "no element expected here, but got element: " << QXmlStreamReader::name();
0410 }
0411 
0412 void
0413 OpmlParser::endDocument()
0414 {
0415     Q_EMIT doneParsing();
0416 }
0417 
0418 void
0419 OpmlParser::endHead()
0420 {
0421     Q_EMIT headerDone();
0422 }
0423 
0424 void
0425 OpmlParser::endTitle()
0426 {
0427     m_headerData.insert( "title", m_buffer.trimmed() );
0428 }
0429 
0430 void
0431 OpmlParser::endOutline()
0432 {
0433     OpmlOutline *outline = m_outlineStack.pop();
0434     if( m_outlineStack.isEmpty() )
0435         m_outlines << outline;
0436 }
0437 
0438 void
0439 OpmlParser::readCharacters()
0440 {
0441     m_buffer += text();
0442 }
0443 
0444 void
0445 OpmlParser::readNoCharacters()
0446 {
0447     DEBUG_BLOCK
0448     debug() << "no characters expected here";
0449 }