File indexing completed on 2024-04-21 04:47:52
0001 /**************************************************************************************** 0002 * Copyright (c) 2010 Bart Cerneels <bart.cerneels@kde.org> * 0003 * 2009 Mathias Panzenböck <grosser.meister.morti@gmx.net> * 0004 * * 0005 * This program is free software; you can redistribute it and/or modify it under * 0006 * the terms of the GNU General Public License as published by the Free Software * 0007 * Foundation; either version 2 of the License, or (at your option) any later * 0008 * version. * 0009 * * 0010 * This program is distributed in the hope that it will be useful, but WITHOUT ANY * 0011 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A * 0012 * PARTICULAR PURPOSE. See the GNU General Public License for more details. * 0013 * * 0014 * You should have received a copy of the GNU General Public License along with * 0015 * this program. If not, see <http://www.gnu.org/licenses/>. * 0016 ****************************************************************************************/ 0017 0018 #include "OpmlParser.h" 0019 0020 #include "core/support/Amarok.h" 0021 #include "core/support/Debug.h" 0022 0023 #include <QFile> 0024 #include <QXmlStreamReader> 0025 0026 #include <KLocalizedString> 0027 #include <kio/job.h> 0028 0029 const QString OpmlParser::OPML_MIME = "text/x-opml+xml"; 0030 0031 const OpmlParser::StaticData OpmlParser::sd; 0032 0033 OpmlParser::OpmlParser( const QUrl &url ) 0034 : QObject() 0035 , ThreadWeaver::Job() 0036 , QXmlStreamReader() 0037 , m_url( url ) 0038 { 0039 } 0040 0041 OpmlParser::~OpmlParser() 0042 { 0043 } 0044 0045 void 0046 OpmlParser::run(ThreadWeaver::JobPointer self, ThreadWeaver::Thread *thread) 0047 { 0048 Q_UNUSED(self); 0049 Q_UNUSED(thread); 0050 read( m_url ); 0051 } 0052 0053 void 0054 OpmlParser::defaultBegin(const ThreadWeaver::JobPointer& self, ThreadWeaver::Thread *thread) 0055 { 0056 Q_EMIT started(self); 0057 ThreadWeaver::Job::defaultBegin(self, thread); 0058 } 0059 0060 void 0061 OpmlParser::defaultEnd(const ThreadWeaver::JobPointer& self, ThreadWeaver::Thread *thread) 0062 { 0063 ThreadWeaver::Job::defaultEnd(self, thread); 0064 if (!self->success()) { 0065 Q_EMIT failed(self); 0066 } 0067 Q_EMIT done(self); 0068 } 0069 0070 bool 0071 OpmlParser::read( const QUrl &url ) 0072 { 0073 m_url = url; 0074 if( m_url.isLocalFile() ) 0075 { 0076 //read directly from local file 0077 QFile localFile( m_url.toLocalFile() ); 0078 if( !localFile.open( QIODevice::ReadOnly ) ) 0079 { 0080 debug() << "failed to open local OPML file " << m_url.url(); 0081 return false; 0082 } 0083 0084 return read( &localFile ); 0085 } 0086 0087 m_transferJob = KIO::get( m_url, KIO::Reload, KIO::HideProgressInfo ); 0088 0089 connect( m_transferJob, &KIO::TransferJob::data, 0090 this, &OpmlParser::slotAddData ); 0091 0092 connect( m_transferJob, &KIO::TransferJob::result, 0093 this, &OpmlParser::downloadResult ); 0094 0095 // parse data 0096 return read(); 0097 } 0098 0099 bool 0100 OpmlParser::read( QIODevice *device ) 0101 { 0102 setDevice( device ); 0103 return read(); 0104 } 0105 0106 void 0107 OpmlParser::slotAddData( KIO::Job *job, const QByteArray &data ) 0108 { 0109 Q_UNUSED( job ) 0110 0111 QXmlStreamReader::addData( data ); 0112 0113 // parse more data 0114 continueRead(); 0115 } 0116 0117 void 0118 OpmlParser::downloadResult( KJob *job ) 0119 { 0120 // parse more data 0121 continueRead(); 0122 0123 KIO::TransferJob *transferJob = dynamic_cast<KIO::TransferJob *>( job ); 0124 if( job->error() || ( transferJob && transferJob->isErrorPage() ) ) 0125 { 0126 QString errorMessage = 0127 i18n( "Reading OPML podcast from %1 failed with error:\n", m_url.url() ); 0128 errorMessage = errorMessage.append( job->errorString() ); 0129 0130 // Q_EMIT statusBarErrorMessage( errorMessage ); 0131 } 0132 0133 m_transferJob = nullptr; 0134 } 0135 0136 void 0137 OpmlParser::slotAbort() 0138 { 0139 DEBUG_BLOCK 0140 } 0141 0142 void 0143 OpmlParser::Action::begin( OpmlParser *opmlParser ) const 0144 { 0145 if( m_begin ) 0146 (( *opmlParser ).*m_begin )(); 0147 } 0148 0149 void 0150 OpmlParser::Action::end( OpmlParser *opmlParser ) const 0151 { 0152 if( m_end ) 0153 (( *opmlParser ).*m_end )(); 0154 } 0155 0156 void 0157 OpmlParser::Action::characters( OpmlParser *opmlParser ) const 0158 { 0159 if( m_characters ) 0160 (( *opmlParser ).*m_characters )(); 0161 } 0162 0163 // initialization of the feed parser automata: 0164 OpmlParser::StaticData::StaticData() 0165 : startAction( rootMap ) 0166 , docAction( 0167 docMap, 0168 nullptr, 0169 &OpmlParser::endDocument ) 0170 , skipAction( skipMap ) 0171 , noContentAction( 0172 noContentMap, 0173 &OpmlParser::beginNoElement, 0174 nullptr, 0175 &OpmlParser::readNoCharacters ) 0176 , opmlAction( 0177 opmlMap, 0178 &OpmlParser::beginOpml ) 0179 , headAction( 0180 headMap, 0181 nullptr, 0182 &OpmlParser::endHead ) 0183 , titleAction( 0184 textMap, 0185 &OpmlParser::beginText, 0186 &OpmlParser::endTitle, 0187 &OpmlParser::readCharacters ) 0188 , bodyAction( bodyMap ) 0189 , outlineAction( 0190 outlineMap, 0191 &OpmlParser::beginOutline, 0192 &OpmlParser::endOutline ) 0193 { 0194 // known elements: 0195 knownElements[ "opml" ] = Opml; 0196 knownElements[ "html" ] = Html; 0197 knownElements[ "HTML" ] = Html; 0198 knownElements[ "head" ] = Head; 0199 knownElements[ "title" ] = Title; 0200 knownElements[ "dateCreated" ] = DateCreated; 0201 knownElements[ "dateModified" ] = DateModified; 0202 knownElements[ "ownerName" ] = OwnerName; 0203 knownElements[ "ownerEmail" ] = OwnerEmail; 0204 knownElements[ "ownerId" ] = OwnerId; 0205 knownElements[ "docs" ] = Docs; 0206 knownElements[ "expansionState" ] = ExpansionState; 0207 knownElements[ "vertScrollState" ] = VertScrollState; 0208 knownElements[ "windowTop" ] = WindowTop; 0209 knownElements[ "windowLeft" ] = WindowLeft; 0210 knownElements[ "windowBottom" ] = WindowBottom; 0211 knownElements[ "windowRight" ] = WindowRight; 0212 knownElements[ "body" ] = Body; 0213 knownElements[ "outline" ] = Outline; 0214 0215 // before start document/after end document 0216 rootMap.insert( Document, &docAction ); 0217 0218 // parse document 0219 docMap.insert( Opml, &opmlAction ); 0220 // docMap.insert( Html, &htmlAction ); 0221 0222 // parse <opml> 0223 opmlMap.insert( Head, &headAction ); 0224 opmlMap.insert( Body, &bodyAction ); 0225 0226 // parse <head> 0227 headMap.insert( Title, &titleAction ); 0228 headMap.insert( DateCreated, &skipAction ); 0229 headMap.insert( DateModified, &skipAction ); 0230 headMap.insert( OwnerName, &skipAction ); 0231 headMap.insert( OwnerEmail, &skipAction ); 0232 headMap.insert( OwnerId, &skipAction ); 0233 headMap.insert( Docs, &skipAction ); 0234 headMap.insert( ExpansionState, &skipAction ); 0235 headMap.insert( VertScrollState, &skipAction ); 0236 headMap.insert( WindowTop, &skipAction ); 0237 headMap.insert( WindowLeft, &skipAction ); 0238 headMap.insert( WindowBottom, &skipAction ); 0239 headMap.insert( WindowRight, &skipAction ); 0240 0241 // parse <body> 0242 bodyMap.insert( Outline, &outlineAction ); 0243 0244 // parse <outline> in case of sub-elements 0245 outlineMap.insert( Outline, &outlineAction ); 0246 0247 // skip elements 0248 skipMap.insert( Any, &skipAction ); 0249 0250 } 0251 0252 OpmlParser::ElementType 0253 OpmlParser::elementType() const 0254 { 0255 if( isEndDocument() || isStartDocument() ) 0256 return Document; 0257 0258 if( isCDATA() || isCharacters() ) 0259 return CharacterData; 0260 0261 ElementType elementType = sd.knownElements[ QXmlStreamReader::name().toString()]; 0262 0263 return elementType; 0264 } 0265 0266 bool 0267 OpmlParser::read() 0268 { 0269 m_buffer.clear(); 0270 m_actionStack.clear(); 0271 m_actionStack.push( &( OpmlParser::sd.startAction ) ); 0272 setNamespaceProcessing( false ); 0273 0274 return continueRead(); 0275 } 0276 0277 bool 0278 OpmlParser::continueRead() 0279 { 0280 // this is some kind of pushdown automata 0281 // with this it should be possible to parse feeds in parallel 0282 // without using threads 0283 DEBUG_BLOCK 0284 0285 while( !atEnd() && error() != CustomError ) 0286 { 0287 TokenType token = readNext(); 0288 0289 if( error() == PrematureEndOfDocumentError && m_transferJob ) 0290 return true; 0291 0292 if( hasError() ) 0293 { 0294 Q_EMIT doneParsing(); 0295 return false; 0296 } 0297 0298 if( m_actionStack.isEmpty() ) 0299 { 0300 debug() << "expected element on stack!"; 0301 return false; 0302 } 0303 0304 const Action* action = m_actionStack.top(); 0305 const Action* subAction = nullptr; 0306 0307 switch( token ) 0308 { 0309 case Invalid: 0310 { 0311 debug() << "invalid token received at line " << lineNumber(); 0312 debug() << "Error:\n" << errorString(); 0313 return false; 0314 } 0315 0316 case StartDocument: 0317 case StartElement: 0318 subAction = action->actionMap()[ elementType() ]; 0319 0320 if( !subAction ) 0321 subAction = action->actionMap()[ Any ]; 0322 0323 if( !subAction ) 0324 subAction = &( OpmlParser::sd.skipAction ); 0325 0326 m_actionStack.push( subAction ); 0327 0328 subAction->begin( this ); 0329 break; 0330 0331 case EndDocument: 0332 case EndElement: 0333 action->end( this ); 0334 0335 if( m_actionStack.pop() != action ) 0336 { 0337 debug() << "popped other element than expected!"; 0338 } 0339 break; 0340 0341 case Characters: 0342 if( !isWhitespace() || isCDATA() ) 0343 { 0344 action->characters( this ); 0345 } 0346 0347 // ignorable whitespaces 0348 case Comment: 0349 case EntityReference: 0350 case ProcessingInstruction: 0351 case DTD: 0352 case NoToken: 0353 // ignore 0354 break; 0355 } 0356 } 0357 0358 return !hasError(); 0359 } 0360 0361 void 0362 OpmlParser::stopWithError( const QString &message ) 0363 { 0364 raiseError( message ); 0365 0366 if( m_transferJob ) 0367 { 0368 m_transferJob->kill( KJob::EmitResult ); 0369 m_transferJob = nullptr; 0370 } 0371 0372 Q_EMIT doneParsing(); 0373 } 0374 0375 void 0376 OpmlParser::beginOpml() 0377 { 0378 m_outlineStack.clear(); 0379 } 0380 0381 void 0382 OpmlParser::beginText() 0383 { 0384 m_buffer.clear(); 0385 } 0386 0387 void 0388 OpmlParser::beginOutline() 0389 { 0390 OpmlOutline *parent = m_outlineStack.empty() ? nullptr : m_outlineStack.top(); 0391 OpmlOutline *outline = new OpmlOutline( parent ); 0392 //adding outline to stack 0393 m_outlineStack.push( outline ); 0394 if( parent ) 0395 { 0396 parent->setHasChildren( true ); 0397 parent->addChild( outline ); 0398 } 0399 0400 foreach( const QXmlStreamAttribute &attribute, attributes() ) 0401 outline->addAttribute( attribute.name().toString(), attribute.value().toString() ); 0402 0403 Q_EMIT outlineParsed( outline ); 0404 } 0405 0406 void 0407 OpmlParser::beginNoElement() 0408 { 0409 debug() << "no element expected here, but got element: " << QXmlStreamReader::name(); 0410 } 0411 0412 void 0413 OpmlParser::endDocument() 0414 { 0415 Q_EMIT doneParsing(); 0416 } 0417 0418 void 0419 OpmlParser::endHead() 0420 { 0421 Q_EMIT headerDone(); 0422 } 0423 0424 void 0425 OpmlParser::endTitle() 0426 { 0427 m_headerData.insert( "title", m_buffer.trimmed() ); 0428 } 0429 0430 void 0431 OpmlParser::endOutline() 0432 { 0433 OpmlOutline *outline = m_outlineStack.pop(); 0434 if( m_outlineStack.isEmpty() ) 0435 m_outlines << outline; 0436 } 0437 0438 void 0439 OpmlParser::readCharacters() 0440 { 0441 m_buffer += text(); 0442 } 0443 0444 void 0445 OpmlParser::readNoCharacters() 0446 { 0447 DEBUG_BLOCK 0448 debug() << "no characters expected here"; 0449 }