File indexing completed on 2024-04-21 04:47:52

0001 /****************************************************************************************
0002  * Copyright (c) 2008 Nikolaj Hald Nielsen <nhn@kde.org>                                *
0003  * Copyright (c) 2009 Bart Cerneels <bart.cerneels@kde.org>                             *
0004  *                                                                                      *
0005  * This program is free software; you can redistribute it and/or modify it under        *
0006  * the terms of the GNU General Public License as published by the Free Software        *
0007  * Foundation; either version 2 of the License, or (at your option) any later           *
0008  * version.                                                                             *
0009  *                                                                                      *
0010  * This program is distributed in the hope that it will be useful, but WITHOUT ANY      *
0011  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A      *
0012  * PARTICULAR PURPOSE. See the GNU General Public License for more details.             *
0013  *                                                                                      *
0014  * You should have received a copy of the GNU General Public License along with         *
0015  * this program.  If not, see <http://www.gnu.org/licenses/>.                           *
0016  ****************************************************************************************/
0017 
0018 #ifndef OPMLPARSER_H
0019 #define OPMLPARSER_H
0020 
0021 #include "amarok_export.h"
0022 #include "OpmlOutline.h"
0023 
0024 #include <QHash>
0025 #include <QMap>
0026 #include <QStack>
0027 #include <QStringList>
0028 #include <QUrl>
0029 #include <QXmlStreamReader>
0030 
0031 #include <KJob>
0032 #include <ThreadWeaver/Job>
0033 
0034 namespace KIO
0035 {
0036     class Job;
0037     class TransferJob;
0038 }
0039 
0040 /**
0041 * Parser for OPML files.
0042 */
0043 class AMAROK_EXPORT OpmlParser : public QObject, public ThreadWeaver::Job, public QXmlStreamReader
0044 {
0045     Q_OBJECT
0046 
0047 public:
0048     static const QString OPML_MIME;
0049     /**
0050      * Constructor
0051      * @param url The address to parse 
0052      * @return Pointer to new object
0053      */
0054     explicit OpmlParser( const QUrl &url );
0055 
0056     /**
0057      * Destructor
0058      * @return none
0059      */
0060     ~OpmlParser() override;
0061 
0062     /**
0063     * The function that starts the actual work. Inherited from ThreadWeaver::Job
0064     * Note the work is performed in a separate thread
0065     * @return Returns true on success and false on failure
0066     */
0067     void run(ThreadWeaver::JobPointer self = QSharedPointer<ThreadWeaver::Job>(), ThreadWeaver::Thread *thread = nullptr) override;
0068 
0069     bool read( const QUrl &url );
0070     bool read( QIODevice *device );
0071 
0072     /** @return the URL of the OPML being parsed.
0073     */
0074     QUrl url() const { return m_url; }
0075 
0076     QMap<QString,QString> headerData() const { return m_headerData; }
0077 
0078     /**
0079      * Get the result of the parsing as a list of OpmlOutlines.
0080      * This list contains only root outlines that can be found in the \<body\> of the OPML.
0081      * The rest are children of these root items.
0082      *
0083      * The user is responsible for deleting the results.
0084      */
0085     QList<OpmlOutline *> results() const { return m_outlines; }
0086 
0087 protected:
0088     void defaultBegin(const ThreadWeaver::JobPointer& job, ThreadWeaver::Thread *thread) override;
0089     void defaultEnd(const ThreadWeaver::JobPointer& job, ThreadWeaver::Thread *thread) override;
0090 
0091 Q_SIGNALS:
0092 
0093     /**
0094      * Emitted when \<head\> has been completely parsed.
0095      */
0096     void headerDone();
0097 
0098     /**
0099      * Signal emitted when parsing is complete.
0100      * The data is complete now and accessible via results().
0101      * Children of all the outlines are available via OpmlOutline::children().
0102      */
0103     void doneParsing();
0104 
0105     /**
0106      * Emitted when a new outline item is available.
0107      * Emitted after the attributes have been read but before any of the children are available.
0108      * Each child will be reported in a separate signal.
0109      */
0110     void outlineParsed( OpmlOutline *outline );
0111 
0112     /** This signal is emitted when this job is being processed by a thread. */
0113     void started(ThreadWeaver::JobPointer);
0114     /** This signal is emitted when the job has been finished (no matter if it succeeded or not). */
0115     void done(ThreadWeaver::JobPointer);
0116     /** This job has failed.
0117      * This signal is emitted when success() returns false after the job is executed. */
0118     void failed(ThreadWeaver::JobPointer);
0119 
0120 public Q_SLOTS:
0121     virtual void slotAbort();
0122 
0123 private Q_SLOTS:
0124     void slotAddData( KIO::Job *, const QByteArray &data );
0125 
0126     void downloadResult( KJob * );
0127 
0128 private:
0129     enum ElementType
0130     {
0131         Unknown = 0,
0132         Any,
0133         Document,
0134         CharacterData,
0135         Opml,
0136         Html,
0137         Head,
0138         Title,
0139         DateCreated,
0140         DateModified,
0141         OwnerName,
0142         OwnerEmail,
0143         OwnerId,
0144         Docs,
0145         ExpansionState,
0146         VertScrollState,
0147         WindowTop,
0148         WindowLeft,
0149         WindowBottom,
0150         WindowRight,
0151         Body,
0152         Outline
0153     };
0154 
0155     class Action;
0156     typedef void (OpmlParser::*ActionCallback)();
0157     typedef QHash<ElementType, Action*> ActionMap;
0158 
0159     class Action
0160     {
0161         public:
0162             explicit Action( ActionMap &actionMap )
0163                 : m_actionMap( actionMap )
0164                 , m_begin( nullptr )
0165                 , m_end( nullptr )
0166                 , m_characters( nullptr ) {}
0167 
0168             Action(ActionMap &actionMap, ActionCallback begin)
0169                 : m_actionMap( actionMap )
0170                 , m_begin( begin )
0171                 , m_end( nullptr )
0172                 , m_characters( nullptr ) {}
0173 
0174             Action(ActionMap &actionMap, ActionCallback begin, ActionCallback end)
0175                 : m_actionMap( actionMap )
0176                 , m_begin( begin )
0177                 , m_end( end )
0178                 , m_characters( nullptr ) {}
0179 
0180             Action(ActionMap &actionMap, ActionCallback begin,
0181                     ActionCallback end, ActionCallback characters)
0182                 : m_actionMap( actionMap )
0183                 , m_begin( begin )
0184                 , m_end( end )
0185                 , m_characters( characters ) {}
0186 
0187             void begin( OpmlParser *opmlParser ) const;
0188             void end( OpmlParser *opmlParser ) const;
0189             void characters( OpmlParser *opmlParser ) const;
0190 
0191             const ActionMap &actionMap() const { return m_actionMap; }
0192 
0193         private:
0194             ActionMap &m_actionMap;
0195             ActionCallback m_begin;
0196             ActionCallback m_end;
0197             ActionCallback m_characters;
0198     };
0199 
0200     ElementType elementType() const;
0201     bool read();
0202     bool continueRead();
0203 
0204     // callback methods for parsing
0205     void beginOpml();
0206     void beginText();
0207     void beginOutline();
0208     void beginNoElement();
0209 
0210     void endDocument();
0211     void endHead();
0212     void endTitle();
0213     void endOutline();
0214 
0215     void readCharacters();
0216     void readNoCharacters();
0217 
0218     void stopWithError( const QString &message );
0219 
0220     class StaticData {
0221         public:
0222             StaticData();
0223 
0224             // This here basically builds an automata.
0225             // This way feed parsing can be paused after any token,
0226             // thus enabling parallel download and parsing of multiple
0227             // feeds without the need for threads.
0228 
0229             QHash<QString, ElementType> knownElements;
0230 
0231             //Actions
0232             Action startAction;
0233 
0234             Action docAction;
0235             Action skipAction;
0236             Action noContentAction;
0237 
0238             Action opmlAction;
0239 
0240             Action headAction;
0241             Action titleAction;
0242 //            Action dateCreatedAction;
0243 //            Action dateModifiedAction;
0244 //            Action ownerNameAction;
0245 //            Action ownerEmailAction;
0246 //            Action ownerIdAction;
0247 //            Action docsAction;
0248 //            Action expansionStateAction;
0249             Action bodyAction;
0250             Action outlineAction;
0251 
0252             ActionMap rootMap;
0253             ActionMap skipMap;
0254             ActionMap noContentMap;
0255             ActionMap xmlMap;
0256 
0257             ActionMap docMap;
0258             ActionMap opmlMap;
0259             ActionMap headMap;
0260             ActionMap bodyMap;
0261             ActionMap outlineMap;
0262             ActionMap textMap;
0263     };
0264 
0265     static const StaticData sd;
0266 
0267     QStack<const Action*> m_actionStack;
0268 
0269     QString m_buffer;
0270 
0271     QMap<QString,QString> m_headerData;
0272     // the top level outlines of <body>.
0273     QList<OpmlOutline *> m_outlines;
0274 
0275     // currently processing outlines so we can do nested outlines.
0276     QStack<OpmlOutline *> m_outlineStack;
0277 
0278     QUrl m_url;
0279     KIO::TransferJob *m_transferJob;
0280 };
0281 
0282 #endif