File indexing completed on 2024-05-05 16:10:16

0001 /*
0002     This file is part of the KDE libraries
0003 
0004     Copyright (C) 1997 Martin Jones (mjones@kde.org)
0005               (C) 1997 Torben Weis (weis@kde.org)
0006               (C) 1998 Waldo Bastian (bastian@kde.org)
0007               (C) 1999 Lars Knoll (knoll@kde.org)
0008               (C) 2003 Apple Computer, Inc.
0009 
0010     This library is free software; you can redistribute it and/or
0011     modify it under the terms of the GNU Library General Public
0012     License as published by the Free Software Foundation; either
0013     version 2 of the License, or (at your option) any later version.
0014 
0015     This library is distributed in the hope that it will be useful,
0016     but WITHOUT ANY WARRANTY; without even the implied warranty of
0017     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
0018     Library General Public License for more details.
0019 
0020     You should have received a copy of the GNU Library General Public License
0021     along with this library; see the file COPYING.LIB.  If not, write to
0022     the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
0023     Boston, MA 02110-1301, USA.
0024 */
0025 //----------------------------------------------------------------------------
0026 //
0027 // KDE HTML Widget -- HTML Parser
0028 
0029 #ifndef HTMLPARSER_H
0030 #define HTMLPARSER_H
0031 
0032 // 0 all
0033 // 1 domtree + rendertree + styleForElement, no layouting
0034 // 2 domtree only
0035 #define SPEED_DEBUG 0
0036 
0037 #ifdef SPEED_DEBUG
0038 #include <QDateTime>
0039 #endif
0040 
0041 #include "dom/dom_string.h"
0042 #include "xml/dom_nodeimpl.h"
0043 #include "html/html_documentimpl.h"
0044 #include "html/html_headimpl.h"
0045 
0046 class KHTMLView;
0047 class HTMLStackElem;
0048 
0049 namespace DOM
0050 {
0051 class HTMLDocumentImpl;
0052 class DocumentImpl;
0053 class NodeImpl;
0054 class HTMLFormElementImpl;
0055 class HTMLMapElementImpl;
0056 class HTMLHeadElementImpl;
0057 class DocumentFragmentImpl;
0058 }
0059 
0060 namespace khtml
0061 {
0062 
0063 class Token;
0064 class DoctypeToken;
0065 
0066 /**
0067  * The parser for html. It receives a stream of tokens from the HTMLTokenizer, and
0068  * builds up the Document structure form it.
0069  */
0070 class KHTMLParser
0071 {
0072 public:
0073     KHTMLParser(KHTMLView *w, DOM::DocumentImpl *i);
0074     KHTMLParser(DOM::DocumentFragmentImpl *frag, DOM::DocumentImpl *doc);
0075     virtual ~KHTMLParser();
0076 
0077     /**
0078      * parses one token delivered by the tokenizer
0079      */
0080     void parseToken(Token *_t);
0081 
0082     /**
0083      * parses a doctype token delivered by the tokenizer
0084      */
0085     void parseDoctypeToken(DoctypeToken *_t);
0086 
0087     /**
0088      * resets the parser
0089      */
0090     void reset();
0091 
0092     bool skipMode() const
0093     {
0094         return (discard_until != 0);
0095     }
0096     bool noSpaces() const
0097     {
0098         return (inSelect || !m_inline  || !inBody);
0099     }
0100     bool selectMode() const
0101     {
0102         return inSelect;
0103     }
0104 
0105     DOM::HTMLDocumentImpl *doc() const
0106     {
0107         return static_cast<DOM::HTMLDocumentImpl *>(document);
0108     }
0109     DOM::DocumentImpl *docPtr() const
0110     {
0111         return document;
0112     }
0113 
0114     DOM::HTMLScriptElementImpl *currentScriptElement() const
0115     {
0116         return (current && current->id() == ID_SCRIPT) ? static_cast<DOM::HTMLScriptElementImpl *>(current) : nullptr;
0117     }
0118 
0119 protected:
0120 
0121     KHTMLView *HTMLWidget;
0122     DOM::DocumentImpl *document;
0123 
0124     /*
0125      * generate an element from the token
0126      */
0127     DOM::NodeImpl *getElement(Token *);
0128 
0129     void processCloseTag(Token *);
0130 
0131     bool insertNode(DOM::NodeImpl *n, bool flat = false);
0132 
0133     /*
0134      * The currently active element (the one new elements will be added to)
0135      */
0136     void setCurrent(DOM::NodeImpl *newNode)
0137     {
0138         if (newNode) {
0139             newNode->ref();
0140         }
0141         if (current) {
0142             current->deref();
0143         }
0144         current = newNode;
0145     }
0146 
0147 private:
0148     DOM::NodeImpl *current;
0149 
0150     HTMLStackElem *blockStack;
0151 
0152     void pushBlock(int _id, int _level);
0153 
0154     void generateImpliedEndTags(int _id);
0155     void popOptionalBlock(int _id);
0156     void popBlock(int _id);
0157     void popOneBlock(bool delBlock = true);
0158     void popInlineBlocks();
0159     bool isElementInScope(int _id);
0160     bool isHeadingInScope();
0161 
0162     void freeBlock(void);
0163 
0164     void createHead();
0165 
0166     bool isResidualStyleTag(int _id);
0167     bool isAffectedByResidualStyle(int _id);
0168     void handleResidualStyleCloseTagAcrossBlocks(HTMLStackElem *elem);
0169     void reopenResidualStyleTags(HTMLStackElem *elem, DOM::NodeImpl *malformedTableParent);
0170 
0171     ushort *forbiddenTag;
0172 
0173     /*
0174      * currently active form
0175      */
0176     DOM::HTMLFormElementImpl *form;
0177 
0178     /*
0179      * current map
0180      */
0181     DOM::HTMLMapElementImpl *map;
0182 
0183     /*
0184      * the head element. Needed for crappy html which defines <base> after </head>
0185      */
0186     RefPtr<DOM::HTMLHeadElementImpl> head;
0187 
0188     /*
0189      * a possible <isindex> element in the head. Compatibility hack for
0190      * html from the stone age
0191      */
0192     DOM::NodeImpl *isindex;
0193     DOM::NodeImpl *handleIsindex(Token *t);
0194 
0195     /*
0196      * inserts the stupid isIndex element.
0197      */
0198     void startBody();
0199 
0200     bool inBody;
0201     bool haveContent;
0202     bool haveBody;
0203     bool haveFrameSet;
0204     bool haveTitle;
0205     bool m_inline;
0206     bool end;
0207     bool inSelect;
0208 
0209     /*
0210      * tells the parser to discard all tags, until it reaches the one specified
0211      */
0212     int discard_until;
0213 
0214     bool headLoaded;
0215     int inStrayTableContent;
0216 
0217 #if SPEED_DEBUG > 0
0218     QTime qt;
0219 #endif
0220 };
0221 
0222 } // namespace khtml
0223 
0224 #endif // HTMLPARSER_H
0225