File indexing completed on 2024-04-28 11:39:39

0001 /*
0002  * This file is part of the DOM implementation for KDE.
0003  *
0004  * Copyright (C) 2000 Peter Kelly (pmk@post.com)
0005  *
0006  * This library is free software; you can redistribute it and/or
0007  * modify it under the terms of the GNU Library General Public
0008  * License as published by the Free Software Foundation; either
0009  * version 2 of the License, or (at your option) any later version.
0010  *
0011  * This library is distributed in the hope that it will be useful,
0012  * but WITHOUT ANY WARRANTY; without even the implied warranty of
0013  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
0014  * Library General Public License for more details.
0015  *
0016  * You should have received a copy of the GNU Library General Public License
0017  * along with this library; see the file COPYING.LIB.  If not, write to
0018  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
0019  * Boston, MA 02110-1301, USA.
0020  *
0021  */
0022 
0023 #ifndef _XML_Tokenizer_h_
0024 #define _XML_Tokenizer_h_
0025 
0026 #include <qxml.h>
0027 #include <QStack>
0028 #include <QLinkedList>
0029 #include <QMap>
0030 #include <QObject>
0031 #include "misc/loader_client.h"
0032 #include "misc/stringit.h"
0033 
0034 class KHTMLView;
0035 
0036 namespace khtml
0037 {
0038 class CachedObject;
0039 class CachedScript;
0040 }
0041 
0042 namespace DOM
0043 {
0044 class DocumentImpl;
0045 class NodeImpl;
0046 class HTMLScriptElementImpl;
0047 class DocumentImpl;
0048 class HTMLScriptElementImpl;
0049 }
0050 
0051 namespace khtml
0052 {
0053 
0054 class XMLHandler : public QXmlDefaultHandler
0055 {
0056 public:
0057     XMLHandler(DOM::DocumentImpl *_doc, KHTMLView *_view);
0058     virtual ~XMLHandler();
0059 
0060     // return the error protocol if parsing failed
0061     QString errorProtocol();
0062 
0063     // overloaded handler functions
0064     bool startDocument() override;
0065     bool startElement(const QString &namespaceURI, const QString &localName, const QString &qName, const QXmlAttributes &atts) override;
0066     bool endElement(const QString &namespaceURI, const QString &localName, const QString &qName) override;
0067     bool startCDATA() override;
0068     bool endCDATA() override;
0069     bool characters(const QString &ch) override;
0070     bool comment(const QString &ch) override;
0071     bool processingInstruction(const QString &target, const QString &data) override;
0072     bool startDTD(const QString &name, const QString &publicId, const QString &systemId) override;
0073     bool endDTD() override;
0074 
0075     // namespace handling, to workaround problem in QXML where some attributes
0076     // do not get the namespace resolved properly
0077     bool startPrefixMapping(const QString &prefix, const QString &uri) override;
0078     bool endPrefixMapping(const QString &prefix) override;
0079     void fixUpNSURI(QString &uri, const QString &qname);
0080     QMap<QString, QStack<QString> > namespaceInfo;
0081 
0082     // from QXmlDeclHandler
0083     bool attributeDecl(const QString &eName, const QString &aName, const QString &type, const QString &valueDefault, const QString &value) override;
0084     bool externalEntityDecl(const QString &name, const QString &publicId, const QString &systemId) override;
0085     bool internalEntityDecl(const QString &name, const QString &value) override;
0086 
0087     // from QXmlDTDHandler
0088     bool notationDecl(const QString &name, const QString &publicId, const QString &systemId) override;
0089     bool unparsedEntityDecl(const QString &name, const QString &publicId, const QString &systemId, const QString &notationName) override;
0090 
0091     bool enterText();
0092     void exitText();
0093 
0094     QString errorString() const override;
0095 
0096     bool fatalError(const QXmlParseException &exception) override;
0097 
0098     int errorLine;
0099     int errorCol;
0100 
0101 private:
0102     void pushNode(DOM::NodeImpl *node);
0103     DOM::NodeImpl *popNode();
0104     DOM::NodeImpl *currentNode() const;
0105 private:
0106     QString errorProt;
0107     DOM::DocumentImpl *m_doc;
0108     KHTMLView *m_view;
0109     QStack<DOM::NodeImpl *> m_nodes;
0110     DOM::NodeImpl *m_rootNode;
0111 
0112     enum State {
0113         StateInit,
0114         StateDocument,
0115         StateQuote,
0116         StateLine,
0117         StateHeading,
0118         StateP
0119     };
0120     State state;
0121 };
0122 
0123 class Tokenizer : public QObject
0124 {
0125     Q_OBJECT
0126 public:
0127     virtual void begin() = 0;
0128     // script output must be prepended, while new data
0129     // received during executing a script must be appended, hence the
0130     // extra bool to be able to distinguish between both cases. document.write()
0131     // always uses false, while khtmlpart uses true
0132     virtual void write(const TokenizerString &str, bool appendData) = 0;
0133     virtual void end() = 0;
0134     virtual void finish() = 0;
0135     virtual void setOnHold(bool /*_onHold*/) {}
0136     virtual bool isWaitingForScripts() const = 0;
0137     virtual bool isExecutingScript() const = 0;
0138     virtual void setNormalYieldDelay() {}
0139     virtual void abort() {}
0140 
0141     virtual void executeScriptsWaitingForStylesheets() = 0;
0142 
0143 Q_SIGNALS:
0144     void finishedParsing();
0145 
0146 };
0147 
0148 class XMLIncrementalSource : public QXmlInputSource
0149 {
0150 public:
0151     XMLIncrementalSource();
0152     void fetchData() override;
0153     QChar next() override;
0154     void setData(const QString &str) override;
0155     void setData(const QByteArray &data) override;
0156     QString data() const override;
0157 
0158     void appendXML(const QString &str);
0159     void setFinished(bool);
0160 
0161     inline void setPaused(bool paused = true)
0162     {
0163         m_paused = paused;
0164     }
0165 
0166 private:
0167     QString      m_data;
0168     int          m_pos;
0169     const QChar *m_unicode;
0170     bool         m_finished;
0171     bool         m_paused; // if waiting for scripts
0172 };
0173 
0174 class XMLTokenizer : public Tokenizer, public khtml::CachedObjectClient
0175 {
0176 public:
0177     XMLTokenizer(DOM::DocumentImpl *, KHTMLView * = nullptr);
0178     virtual ~XMLTokenizer();
0179     void begin() override;
0180     void write(const TokenizerString &str, bool) override;
0181     void end() override;
0182     void finish() override;
0183 
0184     // from CachedObjectClient
0185     void notifyFinished(khtml::CachedObject *finishedObj) override;
0186 
0187     void executeScriptsWaitingForStylesheets() override {}
0188 
0189     bool isWaitingForScripts() const override;
0190     bool isExecutingScript() const override
0191     {
0192         return m_executingScript;
0193     }
0194 
0195     // execute script in place, if it contains src attribute we stop parsing till it's downloaded
0196     void executeScript(DOM::NodeImpl *n);
0197 
0198 protected:
0199     DOM::DocumentImpl *m_doc;
0200     KHTMLView *m_view;
0201 
0202     khtml::CachedScript *m_cachedScript;
0203 
0204     QString m_bufferedData;
0205 
0206     XMLHandler m_handler;
0207     QXmlSimpleReader m_reader;
0208     XMLIncrementalSource m_source;
0209     bool m_noErrors;
0210     bool m_executingScript;
0211     bool m_explicitFinishParsingNeeded;
0212     bool m_insideWrite;
0213 };
0214 
0215 } // end namespace
0216 
0217 #endif