File indexing completed on 2024-05-05 16:10:16
0001 /* 0002 This file is part of the KDE libraries 0003 0004 Copyright (C) 1997 Martin Jones (mjones@kde.org) 0005 (C) 1997 Torben Weis (weis@kde.org) 0006 (C) 1998 Waldo Bastian (bastian@kde.org) 0007 (C) 1999 Lars Knoll (knoll@kde.org) 0008 (C) 2003 Apple Computer, Inc. 0009 0010 This library is free software; you can redistribute it and/or 0011 modify it under the terms of the GNU Library General Public 0012 License as published by the Free Software Foundation; either 0013 version 2 of the License, or (at your option) any later version. 0014 0015 This library is distributed in the hope that it will be useful, 0016 but WITHOUT ANY WARRANTY; without even the implied warranty of 0017 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 0018 Library General Public License for more details. 0019 0020 You should have received a copy of the GNU Library General Public License 0021 along with this library; see the file COPYING.LIB. If not, write to 0022 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 0023 Boston, MA 02110-1301, USA. 0024 */ 0025 //---------------------------------------------------------------------------- 0026 // 0027 // KDE HTML Widget -- HTML Parser 0028 0029 #ifndef HTMLPARSER_H 0030 #define HTMLPARSER_H 0031 0032 // 0 all 0033 // 1 domtree + rendertree + styleForElement, no layouting 0034 // 2 domtree only 0035 #define SPEED_DEBUG 0 0036 0037 #ifdef SPEED_DEBUG 0038 #include <QDateTime> 0039 #endif 0040 0041 #include "dom/dom_string.h" 0042 #include "xml/dom_nodeimpl.h" 0043 #include "html/html_documentimpl.h" 0044 #include "html/html_headimpl.h" 0045 0046 class KHTMLView; 0047 class HTMLStackElem; 0048 0049 namespace DOM 0050 { 0051 class HTMLDocumentImpl; 0052 class DocumentImpl; 0053 class NodeImpl; 0054 class HTMLFormElementImpl; 0055 class HTMLMapElementImpl; 0056 class HTMLHeadElementImpl; 0057 class DocumentFragmentImpl; 0058 } 0059 0060 namespace khtml 0061 { 0062 0063 class Token; 0064 class DoctypeToken; 0065 0066 /** 0067 * The parser for html. It receives a stream of tokens from the HTMLTokenizer, and 0068 * builds up the Document structure form it. 0069 */ 0070 class KHTMLParser 0071 { 0072 public: 0073 KHTMLParser(KHTMLView *w, DOM::DocumentImpl *i); 0074 KHTMLParser(DOM::DocumentFragmentImpl *frag, DOM::DocumentImpl *doc); 0075 virtual ~KHTMLParser(); 0076 0077 /** 0078 * parses one token delivered by the tokenizer 0079 */ 0080 void parseToken(Token *_t); 0081 0082 /** 0083 * parses a doctype token delivered by the tokenizer 0084 */ 0085 void parseDoctypeToken(DoctypeToken *_t); 0086 0087 /** 0088 * resets the parser 0089 */ 0090 void reset(); 0091 0092 bool skipMode() const 0093 { 0094 return (discard_until != 0); 0095 } 0096 bool noSpaces() const 0097 { 0098 return (inSelect || !m_inline || !inBody); 0099 } 0100 bool selectMode() const 0101 { 0102 return inSelect; 0103 } 0104 0105 DOM::HTMLDocumentImpl *doc() const 0106 { 0107 return static_cast<DOM::HTMLDocumentImpl *>(document); 0108 } 0109 DOM::DocumentImpl *docPtr() const 0110 { 0111 return document; 0112 } 0113 0114 DOM::HTMLScriptElementImpl *currentScriptElement() const 0115 { 0116 return (current && current->id() == ID_SCRIPT) ? static_cast<DOM::HTMLScriptElementImpl *>(current) : nullptr; 0117 } 0118 0119 protected: 0120 0121 KHTMLView *HTMLWidget; 0122 DOM::DocumentImpl *document; 0123 0124 /* 0125 * generate an element from the token 0126 */ 0127 DOM::NodeImpl *getElement(Token *); 0128 0129 void processCloseTag(Token *); 0130 0131 bool insertNode(DOM::NodeImpl *n, bool flat = false); 0132 0133 /* 0134 * The currently active element (the one new elements will be added to) 0135 */ 0136 void setCurrent(DOM::NodeImpl *newNode) 0137 { 0138 if (newNode) { 0139 newNode->ref(); 0140 } 0141 if (current) { 0142 current->deref(); 0143 } 0144 current = newNode; 0145 } 0146 0147 private: 0148 DOM::NodeImpl *current; 0149 0150 HTMLStackElem *blockStack; 0151 0152 void pushBlock(int _id, int _level); 0153 0154 void generateImpliedEndTags(int _id); 0155 void popOptionalBlock(int _id); 0156 void popBlock(int _id); 0157 void popOneBlock(bool delBlock = true); 0158 void popInlineBlocks(); 0159 bool isElementInScope(int _id); 0160 bool isHeadingInScope(); 0161 0162 void freeBlock(void); 0163 0164 void createHead(); 0165 0166 bool isResidualStyleTag(int _id); 0167 bool isAffectedByResidualStyle(int _id); 0168 void handleResidualStyleCloseTagAcrossBlocks(HTMLStackElem *elem); 0169 void reopenResidualStyleTags(HTMLStackElem *elem, DOM::NodeImpl *malformedTableParent); 0170 0171 ushort *forbiddenTag; 0172 0173 /* 0174 * currently active form 0175 */ 0176 DOM::HTMLFormElementImpl *form; 0177 0178 /* 0179 * current map 0180 */ 0181 DOM::HTMLMapElementImpl *map; 0182 0183 /* 0184 * the head element. Needed for crappy html which defines <base> after </head> 0185 */ 0186 RefPtr<DOM::HTMLHeadElementImpl> head; 0187 0188 /* 0189 * a possible <isindex> element in the head. Compatibility hack for 0190 * html from the stone age 0191 */ 0192 DOM::NodeImpl *isindex; 0193 DOM::NodeImpl *handleIsindex(Token *t); 0194 0195 /* 0196 * inserts the stupid isIndex element. 0197 */ 0198 void startBody(); 0199 0200 bool inBody; 0201 bool haveContent; 0202 bool haveBody; 0203 bool haveFrameSet; 0204 bool haveTitle; 0205 bool m_inline; 0206 bool end; 0207 bool inSelect; 0208 0209 /* 0210 * tells the parser to discard all tags, until it reaches the one specified 0211 */ 0212 int discard_until; 0213 0214 bool headLoaded; 0215 int inStrayTableContent; 0216 0217 #if SPEED_DEBUG > 0 0218 QTime qt; 0219 #endif 0220 }; 0221 0222 } // namespace khtml 0223 0224 #endif // HTMLPARSER_H 0225