File indexing completed on 2024-05-05 16:10:16
0001 /* 0002 This file is part of the KDE libraries 0003 0004 Copyright (C) 1997 Martin Jones (mjones@kde.org) 0005 (C) 1997 Torben Weis (weis@kde.org) 0006 (C) 1999,2001 Lars Knoll (knoll@kde.org) 0007 (C) 2000,2001 Dirk Mueller (mueller@kde.org) 0008 (C) 2003 Apple Computer, Inc. 0009 0010 This library is free software; you can redistribute it and/or 0011 modify it under the terms of the GNU Library General Public 0012 License as published by the Free Software Foundation; either 0013 version 2 of the License, or (at your option) any later version. 0014 0015 This library is distributed in the hope that it will be useful, 0016 but WITHOUT ANY WARRANTY; without even the implied warranty of 0017 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 0018 Library General Public License for more details. 0019 0020 You should have received a copy of the GNU Library General Public License 0021 along with this library; see the file COPYING.LIB. If not, write to 0022 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 0023 Boston, MA 02110-1301, USA. 0024 */ 0025 //---------------------------------------------------------------------------- 0026 // 0027 // KDE HTML Widget -- HTML Parser 0028 // #define PARSER_DEBUG 0029 0030 #include "htmlparser.h" 0031 0032 #include <dom/dom_exception.h> 0033 0034 #include <html/html_baseimpl.h> 0035 #include <html/html_blockimpl.h> 0036 #include <html/html_canvasimpl.h> 0037 #include <html/html_documentimpl.h> 0038 #include <html/html_elementimpl.h> 0039 #include <html/html_formimpl.h> 0040 #include <html/html_headimpl.h> 0041 #include <html/html_imageimpl.h> 0042 #include <html/html_inlineimpl.h> 0043 #include <html/html_listimpl.h> 0044 #include <html/html_miscimpl.h> 0045 #include <html/html_tableimpl.h> 0046 #include <html/html_objectimpl.h> 0047 #include <html/HTMLAudioElement.h> 0048 #include <html/HTMLVideoElement.h> 0049 #include <html/HTMLSourceElement.h> 0050 #include <xml/dom_textimpl.h> 0051 #include <xml/dom_nodeimpl.h> 0052 #include <html/htmltokenizer.h> 0053 #include <khtmlview.h> 0054 #include <khtml_part.h> 0055 #include <khtml_global.h> 0056 #include <css/cssproperties.h> 0057 #include <css/cssvalues.h> 0058 0059 #include <rendering/render_object.h> 0060 0061 #include "khtml_debug.h" 0062 #include <klocalizedstring.h> 0063 0064 // Turn off gnu90 inlining to avoid linker errors 0065 #undef __GNUC_STDC_INLINE__ 0066 #undef __GNUC_GNU_INLINE__ 0067 #include <doctypes.h> 0068 0069 #undef OPTIONAL // for win32, MinGW 0070 0071 using namespace DOM; 0072 using namespace khtml; 0073 0074 #ifdef PARSER_DEBUG 0075 static QString getParserPrintableName(int id) 0076 { 0077 if (id >= ID_CLOSE_TAG) { 0078 return "/" + getPrintableName(id - ID_CLOSE_TAG); 0079 } else { 0080 return getPrintableName(id); 0081 } 0082 } 0083 #endif 0084 0085 //---------------------------------------------------------------------------- 0086 0087 /** 0088 * @internal 0089 */ 0090 class HTMLStackElem 0091 { 0092 public: 0093 HTMLStackElem(int _id, 0094 int _level, 0095 DOM::NodeImpl *_node, 0096 bool _inline_, 0097 HTMLStackElem *_next) 0098 : 0099 id(_id), 0100 level(_level), 0101 strayTableContent(false), 0102 m_inline(_inline_), 0103 node(_node), 0104 next(_next) 0105 { 0106 node->ref(); 0107 } 0108 0109 ~HTMLStackElem() 0110 { 0111 node->deref(); 0112 } 0113 0114 void setNode(NodeImpl *newNode) 0115 { 0116 newNode->ref(); 0117 node->deref(); 0118 node = newNode; 0119 } 0120 0121 int id; 0122 int level; 0123 bool strayTableContent; 0124 bool m_inline; 0125 NodeImpl *node; 0126 HTMLStackElem *next; 0127 }; 0128 0129 /** 0130 * @internal 0131 * 0132 * The parser parses tokenized input into the document, building up the 0133 * document tree. If the document is wellformed, parsing it is 0134 * straightforward. 0135 * Unfortunately, people can't write wellformed HTML documents, so the parser 0136 * has to be tolerant about errors. 0137 * 0138 * We have to take care of the following error conditions: 0139 * 1. The element being added is explicitly forbidden inside some outer tag. 0140 * In this case we should close all tags up to the one, which forbids 0141 * the element, and add it afterwards. 0142 * 2. We are not allowed to add the element directly. It could be, that 0143 * the person writing the document forgot some tag inbetween (or that the 0144 * tag inbetween is optional...) This could be the case with the following 0145 * tags: HTML HEAD BODY TBODY TR TD LI (did I forget any?) 0146 * 3. We wan't to add a block element inside to an inline element. Close all 0147 * inline elements up to the next higher block element. 0148 * 4. If this doesn't help close elements, until we are allowed to add the 0149 * element or ignore the tag. 0150 * 0151 */ 0152 0153 KHTMLParser::KHTMLParser(KHTMLView *_parent, DocumentImpl *doc) 0154 { 0155 //qCDebug(KHTML_LOG) << "parser constructor"; 0156 #if SPEED_DEBUG > 0 0157 qt.start(); 0158 #endif 0159 0160 HTMLWidget = _parent; 0161 document = doc; 0162 0163 blockStack = nullptr; 0164 current = nullptr; 0165 0166 // ID_CLOSE_TAG == Num of tags 0167 forbiddenTag = new ushort[ID_CLOSE_TAG + 1]; 0168 0169 reset(); 0170 } 0171 0172 KHTMLParser::KHTMLParser(DOM::DocumentFragmentImpl *i, DocumentImpl *doc) 0173 { 0174 HTMLWidget = nullptr; 0175 document = doc; 0176 0177 forbiddenTag = new ushort[ID_CLOSE_TAG + 1]; 0178 0179 blockStack = nullptr; 0180 current = nullptr; 0181 0182 reset(); 0183 0184 setCurrent(i); 0185 0186 inBody = true; 0187 } 0188 0189 KHTMLParser::~KHTMLParser() 0190 { 0191 #if SPEED_DEBUG > 0 0192 qCDebug(KHTML_LOG) << "TIME: parsing time was = " << qt.elapsed(); 0193 #endif 0194 0195 freeBlock(); 0196 0197 if (current) { 0198 current->deref(); 0199 } 0200 0201 delete [] forbiddenTag; 0202 delete isindex; 0203 } 0204 0205 void KHTMLParser::reset() 0206 { 0207 setCurrent(document); 0208 0209 freeBlock(); 0210 0211 // before parsing no tags are forbidden... 0212 memset(forbiddenTag, 0, (ID_CLOSE_TAG + 1)*sizeof(ushort)); 0213 0214 inBody = false; 0215 haveFrameSet = false; 0216 haveContent = false; 0217 haveBody = false; 0218 haveTitle = false; 0219 inSelect = false; 0220 inStrayTableContent = 0; 0221 m_inline = false; 0222 0223 form = nullptr; 0224 map = nullptr; 0225 end = false; 0226 isindex = nullptr; 0227 0228 discard_until = 0; 0229 } 0230 0231 void KHTMLParser::parseToken(Token *t) 0232 { 0233 if (t->tid > 2 * ID_CLOSE_TAG) { 0234 // qCDebug(KHTML_LOG) << "Unknown tag!! tagID = " << t->tid; 0235 return; 0236 } 0237 if (discard_until) { 0238 if (t->tid == discard_until) { 0239 discard_until = 0; 0240 } 0241 0242 // do not skip </iframe> 0243 if (discard_until || current->id() + ID_CLOSE_TAG != t->tid) { 0244 return; 0245 } 0246 } 0247 0248 #ifdef PARSER_DEBUG 0249 qCDebug(KHTML_LOG) << "\n\n==> parser: processing token " << getParserPrintableName(t->tid) << "(" << t->tid << ")" 0250 << " current = " << getParserPrintableName(current->id()) << "(" << current->id() << ")"; 0251 qCDebug(KHTML_LOG) << "inline=" << m_inline << " inBody=" << inBody << " haveFrameSet=" << haveFrameSet << " haveContent=" << haveContent; 0252 #endif 0253 0254 // holy shit. apparently some sites use </br> instead of <br> 0255 // be compatible with IE and NS 0256 if (t->tid == ID_BR + ID_CLOSE_TAG && document->inCompatMode()) { 0257 t->tid -= ID_CLOSE_TAG; 0258 } 0259 0260 if (t->tid > ID_CLOSE_TAG) { 0261 processCloseTag(t); 0262 return; 0263 } 0264 0265 // ignore spaces, if we're not inside a paragraph or other inline code 0266 if (t->tid == ID_TEXT && t->text) { 0267 if (inBody && !skipMode() && 0268 current->id() != ID_STYLE && current->id() != ID_TITLE && 0269 current->id() != ID_SCRIPT && 0270 !t->text->containsOnlyWhitespace()) { 0271 haveContent = true; 0272 } 0273 #ifdef PARSER_DEBUG 0274 0275 qCDebug(KHTML_LOG) << "length=" << t->text->l << " text='" << QString::fromRawData(t->text->s, t->text->l) << "'"; 0276 #endif 0277 } 0278 0279 NodeImpl *n = getElement(t); 0280 // just to be sure, and to catch currently unimplemented stuff 0281 if (!n) { 0282 return; 0283 } 0284 0285 // set attributes 0286 if (n->isElementNode() && t->tid != ID_ISINDEX) { 0287 ElementImpl *e = static_cast<ElementImpl *>(n); 0288 e->setAttributeMap(t->attrs); 0289 } 0290 0291 // if this tag is forbidden inside the current context, pop 0292 // blocks until we are allowed to add it... 0293 while (blockStack && forbiddenTag[t->tid]) { 0294 #ifdef PARSER_DEBUG 0295 qCDebug(KHTML_LOG) << "t->id: " << t->tid << " is forbidden :-( "; 0296 #endif 0297 popOneBlock(); 0298 } 0299 0300 // sometimes flat doesn't make sense 0301 switch (t->tid) { 0302 case ID_SELECT: 0303 case ID_OPTION: 0304 t->flat = false; 0305 } 0306 0307 // the tokenizer needs the feedback for space discarding 0308 if (tagPriority(t->tid) == 0) { 0309 t->flat = true; 0310 } 0311 0312 if (!insertNode(n, t->flat)) { 0313 // we couldn't insert the node... 0314 #ifdef PARSER_DEBUG 0315 qCDebug(KHTML_LOG) << "insertNode failed current=" << current->id() << ", new=" << n->id() << "!"; 0316 #endif 0317 if (map == n) { 0318 #ifdef PARSER_DEBUG 0319 qCDebug(KHTML_LOG) << " --> resetting map!"; 0320 #endif 0321 map = nullptr; 0322 } 0323 if (form == n) { 0324 #ifdef PARSER_DEBUG 0325 qCDebug(KHTML_LOG) << " --> resetting form!"; 0326 #endif 0327 form = nullptr; 0328 } 0329 delete n; 0330 } 0331 } 0332 0333 void KHTMLParser::parseDoctypeToken(DoctypeToken *t) 0334 { 0335 // Ignore any doctype after the first. TODO It should be also ignored when processing DocumentFragment 0336 if (current != document || document->doctype()) { 0337 return; 0338 } 0339 0340 DocumentTypeImpl *doctype = new DocumentTypeImpl(document->implementation(), document, t->name, t->publicID, t->systemID); 0341 if (!t->internalSubset.isEmpty()) { 0342 doctype->setInternalSubset(t->internalSubset); 0343 } 0344 document->addChild(doctype); 0345 0346 // Determine parse mode here 0347 // This code more or less mimics Mozilla's implementation. 0348 // 0349 // There are three possible parse modes: 0350 // COMPAT - quirks mode emulates WinIE 0351 // and NS4. CSS parsing is also relaxed in this mode, e.g., unit types can 0352 // be omitted from numbers. 0353 // ALMOST STRICT - This mode is identical to strict mode 0354 // except for its treatment of line-height in the inline box model. For 0355 // now (until the inline box model is re-written), this mode is identical 0356 // to STANDARDS mode. 0357 // STRICT - no quirks apply. Web pages will obey the specifications to 0358 // the letter. 0359 0360 if (!document->isHTMLDocument()) { // FIXME Could document be non-HTML? 0361 return; 0362 } 0363 DOM::HTMLDocumentImpl *htmldoc = static_cast<DOM::HTMLDocumentImpl *>(document); 0364 if (t->name.toLower() == "html") { 0365 if (!t->internalSubset.isEmpty() || t->publicID.isEmpty()) { 0366 // Internal subsets always denote full standards, as does 0367 // a doctype without a public ID. 0368 htmldoc->changeModes(DOM::DocumentImpl::Strict, DOM::DocumentImpl::Html4); 0369 } else { 0370 // We have to check a list of public IDs to see what we 0371 // should do. 0372 QString lowerPubID = t->publicID.toLower(); 0373 QByteArray pubIDStr = lowerPubID.toLocal8Bit(); 0374 0375 // Look up the entry in our gperf-generated table. 0376 const PubIDInfo *doctypeEntry = Perfect_Hash::findDoctypeEntry(pubIDStr.constData(), t->publicID.length()); 0377 if (!doctypeEntry) { 0378 // The DOCTYPE is not in the list. Assume strict mode. 0379 // ### Doesn't make any sense, but it's what Mozilla does. 0380 htmldoc->changeModes(DOM::DocumentImpl::Strict, DOM::DocumentImpl::Html4); 0381 } else { 0382 switch ((!t->systemID.isEmpty()) ? 0383 doctypeEntry->mode_if_sysid : 0384 doctypeEntry->mode_if_no_sysid) { 0385 case PubIDInfo::eQuirks3: 0386 htmldoc->changeModes(DOM::DocumentImpl::Compat, DOM::DocumentImpl::Html3); 0387 break; 0388 case PubIDInfo::eQuirks: 0389 htmldoc->changeModes(DOM::DocumentImpl::Compat, DOM::DocumentImpl::Html4); 0390 break; 0391 case PubIDInfo::eAlmostStandards: 0392 htmldoc->changeModes(DOM::DocumentImpl::Transitional, DOM::DocumentImpl::Html4); 0393 break; 0394 default: 0395 assert(!"Unknown parse mode"); 0396 } 0397 } 0398 } 0399 } else { 0400 // Malformed doctype implies quirks mode. 0401 htmldoc->changeModes(DOM::DocumentImpl::Compat, DOM::DocumentImpl::Html3); 0402 } 0403 } 0404 0405 static bool isTableRelatedTag(int id) 0406 { 0407 return (id == ID_TR || id == ID_TD || id == ID_TABLE || id == ID_TBODY || id == ID_TFOOT || id == ID_THEAD || 0408 id == ID_TH); 0409 } 0410 0411 bool KHTMLParser::insertNode(NodeImpl *n, bool flat) 0412 { 0413 int id = n->id(); 0414 0415 // <table> is never allowed inside stray table content. Always pop out of the stray table content 0416 // and close up the first table, and then start the second table as a sibling. 0417 if (inStrayTableContent && id == ID_TABLE) { 0418 popBlock(ID_TABLE); 0419 } 0420 0421 // let's be stupid and just try to insert it. 0422 // this should work if the document is wellformed 0423 #ifdef PARSER_DEBUG 0424 NodeImpl *tmp = current; 0425 #endif 0426 NodeImpl *newNode = current->addChild(n); 0427 if (newNode) { 0428 #ifdef PARSER_DEBUG 0429 qCDebug(KHTML_LOG) << "added " << n->nodeName().string() << " to " << tmp->nodeName().string() << ", new current=" << newNode->nodeName().string(); 0430 #endif 0431 // We allow TABLE > FORM in dtd.cpp, but do not allow the form have children in this case 0432 if (current->id() == ID_TABLE && id == ID_FORM) { 0433 flat = true; 0434 static_cast<HTMLFormElementImpl *>(n)->setMalformed(true); 0435 } 0436 0437 // don't push elements without end tag on the stack 0438 if (tagPriority(id) != 0 && !flat) { 0439 #if SPEED_DEBUG < 2 0440 if (!n->attached() && HTMLWidget) { 0441 n->attach(); 0442 } 0443 #endif 0444 if (n->isInline()) { 0445 m_inline = true; 0446 } 0447 pushBlock(id, tagPriority(id)); 0448 setCurrent(newNode); 0449 } else { 0450 #if SPEED_DEBUG < 2 0451 if (!n->attached() && HTMLWidget) { 0452 n->attach(); 0453 } 0454 if (n->maintainsState()) { 0455 document->registerMaintainsState(n); 0456 document->attemptRestoreState(n); 0457 } 0458 n->close(); 0459 #endif 0460 if (n->isInline()) { 0461 m_inline = true; 0462 } 0463 } 0464 0465 #if SPEED_DEBUG < 1 0466 if (tagPriority(id) == 0 && n->renderer()) { 0467 n->renderer()->calcMinMaxWidth(); 0468 } 0469 #endif 0470 return true; 0471 } else { 0472 #ifdef PARSER_DEBUG 0473 qCDebug(KHTML_LOG) << "ADDING NODE FAILED!!!! current = " << current->nodeName().string() << ", new = " << n->nodeName().string(); 0474 #endif 0475 // error handling... 0476 HTMLElementImpl *e; 0477 bool handled = false; 0478 0479 // first switch on current element for elements with optional end-tag and inline-only content 0480 switch (current->id()) { 0481 case ID_P: 0482 case ID_DT: 0483 if (!n->isInline()) { 0484 popBlock(current->id()); 0485 return insertNode(n); 0486 } 0487 break; 0488 case ID_TITLE: 0489 popBlock(current->id()); 0490 return insertNode(n); 0491 default: 0492 break; 0493 } 0494 0495 // switch according to the element to insert 0496 switch (id) { 0497 case ID_TR: 0498 case ID_TH: 0499 case ID_TD: 0500 if (inStrayTableContent && !isTableRelatedTag(current->id())) { 0501 // pop out to the nearest enclosing table-related tag. 0502 while (blockStack && !isTableRelatedTag(current->id())) { 0503 popOneBlock(); 0504 } 0505 return insertNode(n); 0506 } 0507 break; 0508 case ID_HEAD: 0509 // ### allow not having <HTML> in at all, as per HTML spec 0510 if (!current->isDocumentNode() && current->id() != ID_HTML) { 0511 return false; 0512 } 0513 break; 0514 case ID_COMMENT: 0515 if (head) { 0516 break; 0517 } 0518 case ID_META: 0519 case ID_LINK: 0520 case ID_ISINDEX: 0521 case ID_BASE: 0522 if (!head) { 0523 createHead(); 0524 } 0525 if (head) { 0526 if (head->addChild(n)) { 0527 #if SPEED_DEBUG < 2 0528 if (!n->attached() && HTMLWidget) { 0529 n->attach(); 0530 } 0531 #endif 0532 } 0533 0534 return true; 0535 } 0536 0537 break; 0538 case ID_HTML: 0539 if (!current->isDocumentNode()) { 0540 if (doc()->documentElement()->id() == ID_HTML) { 0541 // we have another <HTML> element.... apply attributes to existing one 0542 // make sure we don't overwrite already existing attributes 0543 NamedAttrMapImpl *map = static_cast<ElementImpl *>(n)->attributes(true); 0544 NamedAttrMapImpl *bmap = static_cast<ElementImpl *>(doc()->documentElement())->attributes(false); 0545 bool changed = false; 0546 for (unsigned long l = 0; map && l < map->length(); ++l) { 0547 NodeImpl::Id attrId = map->idAt(l); 0548 DOMStringImpl *attrValue = map->valueAt(l); 0549 changed = !bmap->getValue(attrId); 0550 bmap->setValue(attrId, attrValue); 0551 } 0552 if (changed) { 0553 doc()->recalcStyle(NodeImpl::Inherit); 0554 } 0555 } 0556 return false; 0557 } 0558 break; 0559 case ID_TITLE: 0560 case ID_STYLE: 0561 if (!head) { 0562 createHead(); 0563 } 0564 if (head) { 0565 DOM::NodeImpl *newNode = head->addChild(n); 0566 if (newNode) { 0567 pushBlock(id, tagPriority(id)); 0568 setCurrent(newNode); 0569 #if SPEED_DEBUG < 2 0570 if (!n->attached() && HTMLWidget) { 0571 n->attach(); 0572 } 0573 #endif 0574 } else { 0575 #ifdef PARSER_DEBUG 0576 qCDebug(KHTML_LOG) << "adding style before to body failed!!!!"; 0577 #endif 0578 discard_until = ID_STYLE + ID_CLOSE_TAG; 0579 return false; 0580 } 0581 return true; 0582 } else if (inBody) { 0583 discard_until = id + ID_CLOSE_TAG; 0584 return false; 0585 } 0586 break; 0587 case ID_SCRIPT: 0588 // if we failed to insert it, go into skip mode 0589 discard_until = id + ID_CLOSE_TAG; 0590 break; 0591 case ID_BODY: 0592 if (inBody && doc()->body()) { 0593 // we have another <BODY> element.... apply attributes to existing one 0594 // make sure we don't overwrite already existing attributes 0595 // some sites use <body bgcolor=rightcolor>...<body bgcolor=wrongcolor> 0596 NamedAttrMapImpl *map = static_cast<ElementImpl *>(n)->attributes(true); 0597 NamedAttrMapImpl *bmap = doc()->body()->attributes(false); 0598 bool changed = false; 0599 for (unsigned long l = 0; map && l < map->length(); ++l) { 0600 NodeImpl::Id attrId = map->idAt(l); 0601 DOMStringImpl *attrValue = map->valueAt(l); 0602 if (!bmap->getValue(attrId)) { 0603 bmap->setValue(attrId, attrValue); 0604 changed = true; 0605 } 0606 } 0607 if (changed) { 0608 doc()->recalcStyle(NodeImpl::Inherit); 0609 } 0610 } else if (current->isDocumentNode()) { 0611 break; 0612 } 0613 return false; 0614 break; 0615 0616 // the following is a hack to move non rendered elements 0617 // outside of tables. 0618 // needed for broken constructs like <table><form ...><tr>.... 0619 case ID_INPUT: { 0620 ElementImpl *e = static_cast<ElementImpl *>(n); 0621 DOMString type = e->getAttribute(ATTR_TYPE); 0622 0623 if (strcasecmp(type, "hidden") != 0) { 0624 break; 0625 } 0626 // Fall through! 0627 } 0628 case ID_TEXT: { 0629 // Don't try to fit random white-space anywhere 0630 TextImpl *t = static_cast<TextImpl *>(n); 0631 if (t->containsOnlyWhitespace()) { 0632 return false; 0633 } 0634 // ignore text inside the following elements. 0635 switch (current->id()) { 0636 case ID_SELECT: 0637 return false; 0638 default: 0639 ; 0640 // fall through!! 0641 }; 0642 break; 0643 } 0644 case ID_DL: 0645 popBlock(ID_DT); 0646 if (current->id() == ID_DL) { 0647 e = new HTMLGenericElementImpl(document, ID_DD); 0648 insertNode(e); 0649 handled = true; 0650 } 0651 break; 0652 case ID_DT: 0653 e = new HTMLDListElementImpl(document); 0654 if (insertNode(e)) { 0655 insertNode(n); 0656 return true; 0657 } 0658 break; 0659 case ID_AREA: { 0660 if (map) { 0661 map->addChild(n); 0662 #if SPEED_DEBUG < 2 0663 if (!n->attached() && HTMLWidget) { 0664 n->attach(); 0665 } 0666 #endif 0667 handled = true; 0668 return true; 0669 } else { 0670 return false; 0671 } 0672 } 0673 0674 case ID_THEAD: 0675 case ID_TBODY: 0676 case ID_TFOOT: 0677 case ID_CAPTION: 0678 case ID_COLGROUP: { 0679 if (isTableRelatedTag(current->id())) { 0680 while (blockStack && current->id() != ID_TABLE && isTableRelatedTag(current->id())) { 0681 popOneBlock(); 0682 } 0683 return insertNode(n); 0684 } 0685 } 0686 default: 0687 break; 0688 } 0689 0690 // switch on the currently active element 0691 switch (current->id()) { 0692 case ID_HTML: 0693 switch (id) { 0694 case ID_SCRIPT: 0695 case ID_STYLE: 0696 case ID_META: 0697 case ID_LINK: 0698 case ID_OBJECT: 0699 case ID_EMBED: 0700 case ID_TITLE: 0701 case ID_ISINDEX: 0702 case ID_BASE: 0703 if (!head) { 0704 head = new HTMLHeadElementImpl(document); 0705 insertNode(head.get()); 0706 handled = true; 0707 } 0708 break; 0709 case ID_TEXT: { 0710 TextImpl *t = static_cast<TextImpl *>(n); 0711 if (t->containsOnlyWhitespace()) { 0712 return false; 0713 } 0714 /* Fall through to default */ 0715 } 0716 default: 0717 if (haveFrameSet) { 0718 break; 0719 } 0720 e = new HTMLBodyElementImpl(document); 0721 startBody(); 0722 insertNode(e); 0723 handled = true; 0724 break; 0725 } 0726 break; 0727 case ID_HEAD: 0728 // we can get here only if the element is not allowed in head. 0729 if (id == ID_HTML) { 0730 return false; 0731 } else { 0732 // This means the body starts here... 0733 if (haveFrameSet) { 0734 break; 0735 } 0736 popBlock(ID_HEAD); 0737 e = new HTMLBodyElementImpl(document); 0738 startBody(); 0739 insertNode(e); 0740 handled = true; 0741 } 0742 break; 0743 case ID_BODY: 0744 break; 0745 case ID_CAPTION: 0746 // Illegal content in a caption. Close the caption and try again. 0747 popBlock(ID_CAPTION); 0748 switch (id) { 0749 case ID_THEAD: 0750 case ID_TFOOT: 0751 case ID_TBODY: 0752 case ID_TR: 0753 case ID_TD: 0754 case ID_TH: 0755 return insertNode(n, flat); 0756 } 0757 break; 0758 case ID_TABLE: 0759 case ID_THEAD: 0760 case ID_TFOOT: 0761 case ID_TBODY: 0762 case ID_TR: 0763 switch (id) { 0764 case ID_TABLE: 0765 popBlock(ID_TABLE); // end the table 0766 handled = checkChild(current->id(), id, doc()->inStrictMode()); 0767 break; 0768 default: { 0769 NodeImpl *node = current; 0770 NodeImpl *parent = node->parentNode(); 0771 // A script may have removed the current node's parent from the DOM 0772 // http://bugzilla.opendarwin.org/show_bug.cgi?id=7137 0773 // FIXME: we should do real recovery here and re-parent with the correct node. 0774 if (!parent) { 0775 return false; 0776 } 0777 NodeImpl *parentparent = parent->parentNode(); 0778 0779 if (n->isTextNode() || 0780 (node->id() == ID_TR && 0781 (parent->id() == ID_THEAD || 0782 parent->id() == ID_TBODY || 0783 parent->id() == ID_TFOOT) && parentparent->id() == ID_TABLE) || 0784 (!checkChild(ID_TR, id) && (node->id() == ID_THEAD || node->id() == ID_TBODY || node->id() == ID_TFOOT) && 0785 parent->id() == ID_TABLE)) { 0786 node = (node->id() == ID_TABLE) ? node : 0787 ((node->id() == ID_TR) ? parentparent : parent); 0788 NodeImpl *parent = node->parentNode(); 0789 if (!parent) { 0790 return false; 0791 } 0792 int exceptioncode = 0; 0793 #ifdef PARSER_DEBUG 0794 qCDebug(KHTML_LOG) << "calling insertBefore(" << n->nodeName().string() << "," << node->nodeName().string() << ")"; 0795 #endif 0796 parent->insertBefore(n, node, exceptioncode); 0797 if (exceptioncode) { 0798 #ifndef PARSER_DEBUG 0799 if (!n->isTextNode()) 0800 #endif 0801 // qCDebug(KHTML_LOG) << "adding content before table failed.."; 0802 break; 0803 } 0804 if (n->isElementNode() && tagPriority(id) != 0 && 0805 !flat && endTagRequirement(id) != DOM::FORBIDDEN) { 0806 0807 pushBlock(id, tagPriority(id)); 0808 setCurrent(n); 0809 inStrayTableContent++; 0810 blockStack->strayTableContent = true; 0811 } 0812 return true; 0813 } 0814 0815 if (current->id() == ID_TR) { 0816 e = new HTMLTableCellElementImpl(document, ID_TD); 0817 } else if (current->id() == ID_TABLE) { 0818 e = new HTMLTableSectionElementImpl(document, ID_TBODY, true /* implicit */); 0819 } else { 0820 e = new HTMLTableRowElementImpl(document); 0821 } 0822 0823 insertNode(e); 0824 handled = true; 0825 break; 0826 } // end default 0827 } // end switch 0828 break; 0829 case ID_OBJECT: 0830 discard_until = id + ID_CLOSE_TAG; 0831 return false; 0832 case ID_UL: 0833 case ID_OL: 0834 case ID_DIR: 0835 case ID_MENU: 0836 e = new HTMLLIElementImpl(document); 0837 e->addCSSProperty(CSS_PROP_LIST_STYLE_TYPE, CSS_VAL_NONE); 0838 insertNode(e); 0839 handled = true; 0840 break; 0841 case ID_FORM: 0842 popBlock(ID_FORM); 0843 handled = true; 0844 break; 0845 case ID_SELECT: 0846 if (n->isInline()) { 0847 return false; 0848 } 0849 break; 0850 case ID_P: 0851 case ID_H1: 0852 case ID_H2: 0853 case ID_H3: 0854 case ID_H4: 0855 case ID_H5: 0856 case ID_H6: 0857 if (!n->isInline()) { 0858 popBlock(current->id()); 0859 handled = true; 0860 } 0861 break; 0862 case ID_OPTION: 0863 case ID_OPTGROUP: 0864 if (id == ID_OPTGROUP) { 0865 popBlock(current->id()); 0866 handled = true; 0867 } else if (id == ID_SELECT) { 0868 // IE treats a nested select as </select>. Let's do the same 0869 popBlock(ID_SELECT); 0870 break; 0871 } 0872 break; 0873 // head elements in the body should be ignored. 0874 0875 case ID_ADDRESS: 0876 case ID_COLGROUP: 0877 case ID_FONT: 0878 popBlock(current->id()); 0879 handled = true; 0880 break; 0881 default: 0882 if (current->isDocumentNode()) { 0883 DocumentImpl *doc = static_cast<DocumentImpl *>(current); 0884 if (!doc->documentElement()) { 0885 e = new HTMLHtmlElementImpl(document); 0886 insertNode(e); 0887 handled = true; 0888 } 0889 } else if (current->isInline()) { 0890 popInlineBlocks(); 0891 handled = true; 0892 } 0893 } 0894 0895 // if we couldn't handle the error, just rethrow the exception... 0896 if (!handled) { 0897 //qCDebug(KHTML_LOG) << "Exception handler failed in HTMLPArser::insertNode()"; 0898 return false; 0899 } 0900 0901 return insertNode(n); 0902 } 0903 } 0904 0905 NodeImpl *KHTMLParser::getElement(Token *t) 0906 { 0907 NodeImpl *n = nullptr; 0908 0909 switch (t->tid) { 0910 case ID_HTML: 0911 n = new HTMLHtmlElementImpl(document); 0912 break; 0913 case ID_HEAD: 0914 if (!head && (current->id() == ID_HTML || current->isDocumentNode())) { 0915 head = new HTMLHeadElementImpl(document); 0916 n = head.get(); 0917 } 0918 break; 0919 case ID_BODY: 0920 // body no longer allowed if we have a frameset 0921 if (haveFrameSet) { 0922 break; 0923 } 0924 popBlock(ID_HEAD); 0925 n = new HTMLBodyElementImpl(document); 0926 haveBody = true; 0927 startBody(); 0928 break; 0929 0930 // head elements 0931 case ID_BASE: 0932 n = new HTMLBaseElementImpl(document); 0933 break; 0934 case ID_LINK: 0935 n = new HTMLLinkElementImpl(document); 0936 break; 0937 case ID_META: 0938 n = new HTMLMetaElementImpl(document); 0939 break; 0940 case ID_STYLE: 0941 n = new HTMLStyleElementImpl(document); 0942 break; 0943 case ID_TITLE: 0944 // only one non-empty <title> allowed 0945 if (haveTitle) { 0946 discard_until = ID_TITLE + ID_CLOSE_TAG; 0947 break; 0948 } 0949 n = new HTMLTitleElementImpl(document); 0950 // we'll set haveTitle when closing the tag 0951 break; 0952 0953 // frames 0954 case ID_FRAME: 0955 n = new HTMLFrameElementImpl(document); 0956 break; 0957 case ID_FRAMESET: 0958 popBlock(ID_HEAD); 0959 if (inBody && !haveFrameSet && !haveContent && !haveBody) { 0960 popBlock(ID_BODY); 0961 // ### actually for IE document.body returns the now hidden "body" element 0962 // we can't implement that behavior now because it could cause too many 0963 // regressions and the headaches are not worth the work as long as there is 0964 // no site actually relying on that detail (Dirk) 0965 if (static_cast<HTMLDocumentImpl *>(document)->body()) 0966 static_cast<HTMLDocumentImpl *>(document)->body() 0967 ->addCSSProperty(CSS_PROP_DISPLAY, CSS_VAL_NONE); 0968 inBody = false; 0969 } 0970 if ((haveBody || haveContent || haveFrameSet) && current->id() == ID_HTML) { 0971 break; 0972 } 0973 n = new HTMLFrameSetElementImpl(document); 0974 haveFrameSet = true; 0975 startBody(); 0976 break; 0977 // a bit a special case, since the frame is inlined... 0978 case ID_IFRAME: 0979 n = new HTMLIFrameElementImpl(document); 0980 break; 0981 0982 // form elements 0983 case ID_FORM: 0984 // thou shall not nest <form> - NS/IE quirk 0985 if (form) { 0986 break; 0987 } 0988 n = form = new HTMLFormElementImpl(document, false); 0989 break; 0990 case ID_BUTTON: 0991 n = new HTMLButtonElementImpl(document, form); 0992 break; 0993 case ID_FIELDSET: 0994 n = new HTMLFieldSetElementImpl(document, form); 0995 break; 0996 case ID_INPUT: 0997 if (t->attrs && 0998 KHTMLGlobal::defaultHTMLSettings()->isAdFilterEnabled() && 0999 KHTMLGlobal::defaultHTMLSettings()->isHideAdsEnabled() && 1000 !strcasecmp(t->attrs->getValue(ATTR_TYPE), "image")) { 1001 const QString url = doc()->completeURL(DOMString(t->attrs->getValue(ATTR_SRC)).trimSpaces().string()); 1002 if (KHTMLGlobal::defaultHTMLSettings()->isAdFiltered(url)) { 1003 return nullptr; 1004 } 1005 } 1006 n = new HTMLInputElementImpl(document, form); 1007 break; 1008 case ID_ISINDEX: 1009 n = handleIsindex(t); 1010 if (!inBody) { 1011 isindex = n; 1012 n = nullptr; 1013 } else { 1014 t->flat = true; 1015 } 1016 break; 1017 case ID_KEYGEN: 1018 n = new HTMLKeygenElementImpl(document, form); 1019 break; 1020 case ID_LABEL: 1021 n = new HTMLLabelElementImpl(document); 1022 break; 1023 case ID_LEGEND: 1024 n = new HTMLLegendElementImpl(document, form); 1025 break; 1026 case ID_OPTGROUP: 1027 n = new HTMLOptGroupElementImpl(document, form); 1028 break; 1029 case ID_OPTION: 1030 popOptionalBlock(ID_OPTION); 1031 n = new HTMLOptionElementImpl(document, form); 1032 break; 1033 case ID_SELECT: 1034 inSelect = true; 1035 n = new HTMLSelectElementImpl(document, form); 1036 break; 1037 case ID_TEXTAREA: 1038 n = new HTMLTextAreaElementImpl(document, form); 1039 break; 1040 1041 // lists 1042 case ID_DL: 1043 n = new HTMLDListElementImpl(document); 1044 break; 1045 case ID_DD: 1046 popOptionalBlock(ID_DT); 1047 popOptionalBlock(ID_DD); 1048 n = new HTMLGenericElementImpl(document, t->tid); 1049 break; 1050 case ID_DT: 1051 popOptionalBlock(ID_DD); 1052 popOptionalBlock(ID_DT); 1053 n = new HTMLGenericElementImpl(document, t->tid); 1054 break; 1055 case ID_UL: { 1056 n = new HTMLUListElementImpl(document); 1057 break; 1058 } 1059 case ID_OL: { 1060 n = new HTMLOListElementImpl(document); 1061 break; 1062 } 1063 case ID_DIR: 1064 n = new HTMLDirectoryElementImpl(document); 1065 break; 1066 case ID_MENU: 1067 n = new HTMLMenuElementImpl(document); 1068 break; 1069 case ID_LI: 1070 popOptionalBlock(ID_LI); 1071 n = new HTMLLIElementImpl(document); 1072 break; 1073 // formatting elements (block) 1074 case ID_BLOCKQUOTE: 1075 n = new HTMLGenericElementImpl(document, t->tid); 1076 break; 1077 case ID_LAYER: 1078 case ID_ILAYER: 1079 n = new HTMLLayerElementImpl(document, t->tid); 1080 break; 1081 case ID_P: 1082 case ID_DIV: 1083 n = new HTMLDivElementImpl(document, t->tid); 1084 break; 1085 case ID_H1: 1086 case ID_H2: 1087 case ID_H3: 1088 case ID_H4: 1089 case ID_H5: 1090 case ID_H6: 1091 n = new HTMLGenericElementImpl(document, t->tid); 1092 break; 1093 case ID_HR: 1094 n = new HTMLHRElementImpl(document); 1095 break; 1096 case ID_PRE: 1097 case ID_XMP: 1098 case ID_PLAINTEXT: 1099 case ID_LISTING: 1100 n = new HTMLPreElementImpl(document, t->tid); 1101 break; 1102 1103 // font stuff 1104 case ID_BASEFONT: 1105 n = new HTMLBaseFontElementImpl(document); 1106 break; 1107 case ID_FONT: 1108 n = new HTMLFontElementImpl(document); 1109 break; 1110 1111 // ins/del 1112 case ID_DEL: 1113 case ID_INS: 1114 n = new HTMLGenericElementImpl(document, t->tid); 1115 break; 1116 1117 // anchor 1118 case ID_A: 1119 popBlock(ID_A); 1120 1121 n = new HTMLAnchorElementImpl(document); 1122 break; 1123 1124 // images 1125 case ID_IMAGE: 1126 case ID_IMG: 1127 if (t->attrs && 1128 KHTMLGlobal::defaultHTMLSettings()->isAdFilterEnabled() && 1129 KHTMLGlobal::defaultHTMLSettings()->isHideAdsEnabled()) { 1130 const QString url = doc()->completeURL(DOMString(t->attrs->getValue(ATTR_SRC)).trimSpaces().string()); 1131 if (KHTMLGlobal::defaultHTMLSettings()->isAdFiltered(url)) { 1132 return nullptr; 1133 } 1134 } 1135 n = new HTMLImageElementImpl(document, form); 1136 break; 1137 1138 case ID_CANVAS: 1139 n = new HTMLCanvasElementImpl(document); 1140 break; 1141 1142 case ID_MAP: 1143 map = new HTMLMapElementImpl(document); 1144 n = map; 1145 break; 1146 case ID_AREA: 1147 n = new HTMLAreaElementImpl(document); 1148 break; 1149 1150 // objects, applets and scripts 1151 case ID_APPLET: 1152 n = new HTMLAppletElementImpl(document); 1153 break; 1154 case ID_EMBED: 1155 n = new HTMLEmbedElementImpl(document); 1156 break; 1157 case ID_OBJECT: 1158 n = new HTMLObjectElementImpl(document); 1159 break; 1160 case ID_PARAM: 1161 n = new HTMLParamElementImpl(document); 1162 break; 1163 case ID_SCRIPT: { 1164 HTMLScriptElementImpl *scriptElement = new HTMLScriptElementImpl(document); 1165 scriptElement->setCreatedByParser(true); 1166 n = scriptElement; 1167 break; 1168 } 1169 1170 // media 1171 case ID_AUDIO: 1172 n = new HTMLAudioElement(document); 1173 break; 1174 case ID_VIDEO: 1175 n = new HTMLVideoElement(document); 1176 break; 1177 case ID_SOURCE: 1178 n = new HTMLSourceElement(document); 1179 break; 1180 1181 // tables 1182 case ID_TABLE: 1183 n = new HTMLTableElementImpl(document); 1184 break; 1185 case ID_CAPTION: 1186 n = new HTMLTableCaptionElementImpl(document); 1187 break; 1188 case ID_COLGROUP: 1189 case ID_COL: 1190 n = new HTMLTableColElementImpl(document, t->tid); 1191 break; 1192 case ID_TR: 1193 popBlock(ID_TR); 1194 n = new HTMLTableRowElementImpl(document); 1195 break; 1196 case ID_TD: 1197 case ID_TH: 1198 popBlock(ID_TH); 1199 popBlock(ID_TD); 1200 n = new HTMLTableCellElementImpl(document, t->tid); 1201 break; 1202 case ID_TBODY: 1203 case ID_THEAD: 1204 case ID_TFOOT: 1205 popBlock(ID_THEAD); 1206 popBlock(ID_TBODY); 1207 popBlock(ID_TFOOT); 1208 n = new HTMLTableSectionElementImpl(document, t->tid, false); 1209 break; 1210 1211 // inline elements 1212 case ID_BR: 1213 n = new HTMLBRElementImpl(document); 1214 break; 1215 case ID_Q: 1216 n = new HTMLGenericElementImpl(document, t->tid); 1217 break; 1218 1219 // elements with no special representation in the DOM 1220 1221 // block: 1222 case ID_ADDRESS: 1223 case ID_CENTER: 1224 n = new HTMLGenericElementImpl(document, t->tid); 1225 break; 1226 // inline 1227 // %fontstyle 1228 case ID_TT: 1229 case ID_U: 1230 case ID_B: 1231 case ID_I: 1232 case ID_S: 1233 case ID_STRIKE: 1234 case ID_BIG: 1235 case ID_SMALL: 1236 1237 // %phrase 1238 case ID_EM: 1239 case ID_STRONG: 1240 case ID_DFN: 1241 case ID_CODE: 1242 case ID_SAMP: 1243 case ID_KBD: 1244 case ID_VAR: 1245 case ID_CITE: 1246 case ID_ABBR: 1247 case ID_ACRONYM: 1248 1249 // %special 1250 case ID_SUB: 1251 case ID_SUP: 1252 case ID_SPAN: 1253 case ID_WBR: 1254 case ID_NOBR: 1255 if (t->tid == ID_NOBR || t->tid == ID_WBR) { 1256 popOptionalBlock(t->tid); 1257 } 1258 case ID_BDO: 1259 n = new HTMLGenericElementImpl(document, t->tid); 1260 break; 1261 1262 // these are special, and normally not rendered 1263 case ID_NOEMBED: 1264 if (!t->flat) { 1265 n = new HTMLGenericElementImpl(document, t->tid); 1266 discard_until = ID_NOEMBED + ID_CLOSE_TAG; 1267 } 1268 return n; 1269 case ID_NOFRAMES: 1270 if (!t->flat) { 1271 n = new HTMLGenericElementImpl(document, t->tid); 1272 discard_until = ID_NOFRAMES + ID_CLOSE_TAG; 1273 } 1274 return n; 1275 case ID_NOSCRIPT: 1276 if (!t->flat) { 1277 n = new HTMLGenericElementImpl(document, t->tid); 1278 if (HTMLWidget && HTMLWidget->part()->jScriptEnabled()) { 1279 discard_until = ID_NOSCRIPT + ID_CLOSE_TAG; 1280 } 1281 } 1282 return n; 1283 case ID_NOLAYER: 1284 // discard_until = ID_NOLAYER + ID_CLOSE_TAG; 1285 return nullptr; 1286 break; 1287 case ID_MARQUEE: 1288 n = new HTMLMarqueeElementImpl(document); 1289 break; 1290 // text 1291 case ID_TEXT: 1292 // qCDebug(KHTML_LOG) << "ID_TEXT: \"" << DOMString(t->text).string() << "\""; 1293 n = new TextImpl(document, t->text); 1294 break; 1295 case ID_COMMENT: 1296 n = new CommentImpl(document, t->text); 1297 break; 1298 default: 1299 n = new HTMLGenericElementImpl(document, t->tid); 1300 break; 1301 // qCDebug(KHTML_LOG) << "Unknown tag " << t->tid << "!"; 1302 } 1303 return n; 1304 } 1305 1306 void KHTMLParser::processCloseTag(Token *t) 1307 { 1308 // FIXME: the below only behaves according to "in body" insertion mode (HTML5 8.2.5.10) 1309 // - might need fixing when we have other insertion modes. 1310 switch (t->tid) { 1311 case ID_HTML+ID_CLOSE_TAG: 1312 case ID_BODY+ID_CLOSE_TAG: 1313 // we never trust those close tags, since stupid webpages close 1314 // them prematurely 1315 return; 1316 case ID_FORM+ID_CLOSE_TAG: // needs additional error checking. See spec. 1317 form = nullptr; 1318 if (!isElementInScope(ID_FORM)) { 1319 // Parse error. Ignore. 1320 return; 1321 } 1322 // this one is to get the right style on the body element 1323 break; 1324 case ID_MAP+ID_CLOSE_TAG: 1325 map = nullptr; 1326 break; 1327 case ID_SELECT+ID_CLOSE_TAG: 1328 inSelect = false; 1329 break; 1330 case ID_TITLE+ID_CLOSE_TAG: 1331 // Set haveTitle only if <title> isn't empty 1332 if (current->firstChild()) { 1333 haveTitle = true; 1334 } 1335 break; 1336 case ID_P+ID_CLOSE_TAG: 1337 if (!isElementInScope(ID_P)) { 1338 // Parse error. Handle as if <p> had been seen. 1339 t->tid = ID_P; 1340 parseToken(t); 1341 popBlock(ID_P); 1342 return; 1343 } 1344 break; 1345 case ID_ADDRESS+ID_CLOSE_TAG: 1346 // case ID_ARTICLE+ID_CLOSE_TAG: 1347 case ID_BLOCKQUOTE+ID_CLOSE_TAG: 1348 case ID_CENTER+ID_CLOSE_TAG: 1349 // case ID_DATAGRID+ID_CLOSE_TAG: 1350 // case ID_DETAILS+ID_CLOSE_TAG: 1351 // case ID_DIALOG+ID_CLOSE_TAG: 1352 case ID_DIR+ID_CLOSE_TAG: 1353 case ID_DIV+ID_CLOSE_TAG: 1354 case ID_DL+ID_CLOSE_TAG: 1355 case ID_FIELDSET+ID_CLOSE_TAG: 1356 // case ID_FIGURE+ID_CLOSE_TAG: 1357 // case ID_FOOTER+ID_CLOSE_TAG: 1358 // case ID_HEADER+ID_CLOSE_TAG: 1359 case ID_LISTING+ID_CLOSE_TAG: 1360 case ID_MENU+ID_CLOSE_TAG: 1361 // case ID_NAV+ID_CLOSE_TAG: 1362 case ID_OL+ID_CLOSE_TAG: 1363 case ID_PRE+ID_CLOSE_TAG: 1364 // case ID_SECTION+ID_CLOSE_TAG: 1365 case ID_UL+ID_CLOSE_TAG: 1366 1367 case ID_DD+ID_CLOSE_TAG: 1368 case ID_DT+ID_CLOSE_TAG: 1369 case ID_LI+ID_CLOSE_TAG: 1370 1371 case ID_APPLET+ID_CLOSE_TAG: // those four should also "Clear the list of active formatting elements 1372 case ID_BUTTON+ID_CLOSE_TAG: // up to the last marker." whenever we implement adoption agency. 1373 case ID_MARQUEE+ID_CLOSE_TAG: 1374 case ID_OBJECT+ID_CLOSE_TAG: 1375 1376 case ID_HEAD+ID_CLOSE_TAG: // ### according to HTML5, should be treated as 'Any other end tag' 1377 // We'll do that when proper 'Any other end tag' handling is implemented. 1378 // In the meantime, test scoping at least (#170694) 1379 1380 if (!isElementInScope(t->tid - ID_CLOSE_TAG)) { 1381 // Parse error. Ignore token. 1382 return; 1383 } 1384 break; 1385 case ID_H1: 1386 case ID_H2: 1387 case ID_H3: 1388 case ID_H4: 1389 case ID_H5: 1390 case ID_H6: 1391 if (!isHeadingInScope()) { 1392 // Parse error. Ignore token. 1393 return; 1394 } 1395 break; 1396 case ID_A: // Formatting elements - will need special handling - cf. HTML5 "adoption agency algorithm" 1397 case ID_B: // meant to replace the "residual style" handling we have now. 1398 case ID_BIG: 1399 case ID_CODE: 1400 case ID_EM: 1401 case ID_FONT: 1402 case ID_I: 1403 case ID_NOBR: 1404 case ID_S: 1405 case ID_SMALL: 1406 case ID_STRIKE: 1407 case ID_STRONG: 1408 case ID_TT: 1409 case ID_U: 1410 break; 1411 1412 default: 1413 // otherTag = true; // FIXME: implement 'Any other end tag' handling 1414 break; 1415 } 1416 1417 #ifdef PARSER_DEBUG 1418 qCDebug(KHTML_LOG) << "added the following children to " << current->nodeName().string(); 1419 NodeImpl *child = current->firstChild(); 1420 while (child != 0) { 1421 qCDebug(KHTML_LOG) << " " << child->nodeName().string(); 1422 child = child->nextSibling(); 1423 } 1424 #endif 1425 1426 generateImpliedEndTags(t->tid - ID_CLOSE_TAG); 1427 popBlock(t->tid - ID_CLOSE_TAG); 1428 1429 #ifdef PARSER_DEBUG 1430 qCDebug(KHTML_LOG) << "closeTag --> current = " << current->nodeName().string(); 1431 #endif 1432 } 1433 1434 bool KHTMLParser::isResidualStyleTag(int _id) 1435 { 1436 switch (_id) { 1437 case ID_A: 1438 case ID_B: 1439 case ID_BIG: 1440 case ID_EM: 1441 case ID_FONT: 1442 case ID_I: 1443 case ID_NOBR: 1444 case ID_S: 1445 case ID_SMALL: 1446 case ID_STRIKE: 1447 case ID_STRONG: 1448 case ID_TT: 1449 case ID_U: 1450 case ID_DFN: 1451 case ID_CODE: 1452 case ID_SAMP: 1453 case ID_KBD: 1454 case ID_VAR: 1455 case ID_DEL: 1456 case ID_INS: 1457 return true; 1458 default: 1459 return false; 1460 } 1461 } 1462 1463 bool KHTMLParser::isAffectedByResidualStyle(int _id) 1464 { 1465 if (isResidualStyleTag(_id)) { 1466 return true; 1467 } 1468 1469 switch (_id) { 1470 case ID_P: 1471 case ID_DIV: 1472 case ID_BLOCKQUOTE: 1473 case ID_ADDRESS: 1474 case ID_H1: 1475 case ID_H2: 1476 case ID_H3: 1477 case ID_H4: 1478 case ID_H5: 1479 case ID_H6: 1480 case ID_CENTER: 1481 case ID_UL: 1482 case ID_OL: 1483 case ID_LI: 1484 case ID_DL: 1485 case ID_DT: 1486 case ID_DD: 1487 case ID_PRE: 1488 case ID_LISTING: 1489 return true; 1490 default: 1491 return false; 1492 } 1493 } 1494 1495 void KHTMLParser::handleResidualStyleCloseTagAcrossBlocks(HTMLStackElem *elem) 1496 { 1497 // Find the element that crosses over to a higher level. 1498 // ### For now, if there is more than one, we will only make sure we close the residual style. 1499 int exceptionCode = 0; 1500 HTMLStackElem *curr = blockStack; 1501 HTMLStackElem *maxElem = nullptr; 1502 HTMLStackElem *endElem = nullptr; 1503 HTMLStackElem *prev = nullptr; 1504 HTMLStackElem *prevMaxElem = nullptr; 1505 bool advancedResidual = false; // ### if set we only close the residual style 1506 while (curr && curr != elem) { 1507 if (curr->level > elem->level) { 1508 if (!isAffectedByResidualStyle(curr->id)) { 1509 return; 1510 } 1511 if (maxElem) { 1512 advancedResidual = true; 1513 } else { 1514 endElem = curr; 1515 } 1516 maxElem = curr; 1517 prevMaxElem = prev; 1518 } 1519 1520 prev = curr; 1521 curr = curr->next; 1522 } 1523 1524 if (!curr || !maxElem) { 1525 return; 1526 } 1527 1528 NodeImpl *residualElem = prev->node; 1529 NodeImpl *blockElem = prevMaxElem ? prevMaxElem->node : current; 1530 RefPtr<NodeImpl> parentElem = elem->node; 1531 1532 // Check to see if the reparenting that is going to occur is allowed according to the DOM. 1533 // FIXME: We should either always allow it or perform an additional fixup instead of 1534 // just bailing here. 1535 // Example: <p><font><center>blah</font></center></p> isn't doing a fixup right now. 1536 if (!parentElem->childAllowed(blockElem)) { 1537 return; 1538 } 1539 1540 if (maxElem->node->parentNode() != elem->node && !advancedResidual) { 1541 // Walk the stack and remove any elements that aren't residual style tags. These 1542 // are basically just being closed up. Example: 1543 // <font><span>Moo<p>Goo</font></p>. 1544 // In the above example, the <span> doesn't need to be reopened. It can just close. 1545 HTMLStackElem *currElem = maxElem->next; 1546 HTMLStackElem *prevElem = maxElem; 1547 while (currElem != elem) { 1548 HTMLStackElem *nextElem = currElem->next; 1549 if (!isResidualStyleTag(currElem->id)) { 1550 prevElem->next = nextElem; 1551 prevElem->setNode(currElem->node); 1552 delete currElem; 1553 } else { 1554 prevElem = currElem; 1555 } 1556 currElem = nextElem; 1557 } 1558 1559 // We have to reopen residual tags in between maxElem and elem. An example of this case s: 1560 // <font><i>Moo<p>Foo</font>. 1561 // In this case, we need to transform the part before the <p> into: 1562 // <font><i>Moo</i></font><i> 1563 // so that the <i> will remain open. This involves the modification of elements 1564 // in the block stack. 1565 // This will also affect how we ultimately reparent the block, since we want it to end up 1566 // under the reopened residual tags (e.g., the <i> in the above example.) 1567 RefPtr<NodeImpl> prevNode = nullptr; 1568 RefPtr<NodeImpl> currNode = nullptr; 1569 currElem = maxElem; 1570 while (currElem->node != residualElem) { 1571 if (isResidualStyleTag(currElem->node->id())) { 1572 // Create a clone of this element. 1573 currNode = currElem->node->cloneNode(false); 1574 currElem->node->close(); 1575 removeForbidden(currElem->id, forbiddenTag); 1576 1577 // Change the stack element's node to point to the clone. 1578 currElem->setNode(currNode.get()); 1579 1580 // Attach the previous node as a child of this new node. 1581 if (prevNode) { 1582 currNode->appendChild(prevNode.get(), exceptionCode); 1583 } else { // The new parent for the block element is going to be the innermost clone. 1584 parentElem = currNode; 1585 } 1586 1587 prevNode = currNode; 1588 } 1589 1590 currElem = currElem->next; 1591 } 1592 1593 // Now append the chain of new residual style elements if one exists. 1594 if (prevNode) { 1595 elem->node->appendChild(prevNode.get(), exceptionCode); 1596 } 1597 } 1598 1599 // We need to make a clone of |residualElem| and place it just inside |blockElem|. 1600 // All content of |blockElem| is reparented to be under this clone. We then 1601 // reparent |blockElem| using real DOM calls so that attachment/detachment will 1602 // be performed to fix up the rendering tree. 1603 // So for this example: <b>...<p>Foo</b>Goo</p> 1604 // The end result will be: <b>...</b><p><b>Foo</b>Goo</p> 1605 // 1606 // Step 1: Remove |blockElem| from its parent, doing a batch detach of all the kids. 1607 SharedPtr<NodeImpl> guard(blockElem); 1608 blockElem->parentNode()->removeChild(blockElem, exceptionCode); 1609 1610 if (!advancedResidual) { 1611 // Step 2: Clone |residualElem|. 1612 RefPtr<NodeImpl> newNode = residualElem->cloneNode(false); // Shallow clone. We don't pick up the same kids. 1613 1614 // Step 3: Place |blockElem|'s children under |newNode|. Remove all of the children of |blockElem| 1615 // before we've put |newElem| into the document. That way we'll only do one attachment of all 1616 // the new content (instead of a bunch of individual attachments). 1617 NodeImpl *currNode = blockElem->firstChild(); 1618 while (currNode) { 1619 NodeImpl *nextNode = currNode->nextSibling(); 1620 SharedPtr<NodeImpl> guard(currNode); //Protect from deletion while moving 1621 blockElem->removeChild(currNode, exceptionCode); 1622 newNode->appendChild(currNode, exceptionCode); 1623 currNode = nextNode; 1624 1625 // TODO - To be replaced. 1626 // Re-register form elements with currently active form, step 1 will have removed them 1627 if (form && currNode && currNode->isGenericFormElement()) { 1628 HTMLGenericFormElementImpl *e = static_cast<HTMLGenericFormElementImpl *>(currNode); 1629 form->registerFormElement(e); 1630 } 1631 } 1632 1633 // Step 4: Place |newNode| under |blockElem|. |blockElem| is still out of the document, so no 1634 // attachment can occur yet. 1635 blockElem->appendChild(newNode.get(), exceptionCode); 1636 } 1637 1638 // Step 5: Reparent |blockElem|. Now the full attachment of the fixed up tree takes place. 1639 parentElem->appendChild(blockElem, exceptionCode); 1640 1641 // Step 6: Elide |elem|, since it is effectively no longer open. Also update 1642 // the node associated with the previous stack element so that when it gets popped, 1643 // it doesn't make the residual element the next current node. 1644 HTMLStackElem *currElem = maxElem; 1645 HTMLStackElem *prevElem = nullptr; 1646 while (currElem != elem) { 1647 prevElem = currElem; 1648 currElem = currElem->next; 1649 } 1650 prevElem->next = elem->next; 1651 prevElem->setNode(elem->node); 1652 delete elem; 1653 1654 // Step 7: Reopen intermediate inlines, e.g., <b><p><i>Foo</b>Goo</p>. 1655 // In the above example, Goo should stay italic. 1656 curr = blockStack; 1657 HTMLStackElem *residualStyleStack = nullptr; 1658 while (curr && curr != endElem) { 1659 // We will actually schedule this tag for reopening 1660 // after we complete the close of this entire block. 1661 NodeImpl *currNode = current; 1662 if (isResidualStyleTag(curr->id)) { 1663 // We've overloaded the use of stack elements and are just reusing the 1664 // struct with a slightly different meaning to the variables. Instead of chaining 1665 // from innermost to outermost, we build up a list of all the tags we need to reopen 1666 // from the outermost to the innermost, i.e., residualStyleStack will end up pointing 1667 // to the outermost tag we need to reopen. 1668 // We also set curr->node to be the actual element that corresponds to the ID stored in 1669 // curr->id rather than the node that you should pop to when the element gets pulled off 1670 // the stack. 1671 popOneBlock(false); 1672 curr->setNode(currNode); 1673 curr->next = residualStyleStack; 1674 residualStyleStack = curr; 1675 } else { 1676 popOneBlock(); 1677 } 1678 1679 curr = blockStack; 1680 } 1681 1682 reopenResidualStyleTags(residualStyleStack, nullptr); // FIXME: Deal with stray table content some day 1683 // if it becomes necessary to do so. 1684 } 1685 1686 void KHTMLParser::reopenResidualStyleTags(HTMLStackElem *elem, DOM::NodeImpl *malformedTableParent) 1687 { 1688 // Loop for each tag that needs to be reopened. 1689 while (elem) { 1690 // Create a shallow clone of the DOM node for this element. 1691 RefPtr<NodeImpl> newNode = elem->node->cloneNode(false); 1692 1693 // Append the new node. In the malformed table case, we need to insert before the table, 1694 // which will be the last child. 1695 int exceptionCode = 0; 1696 if (malformedTableParent) { 1697 malformedTableParent->insertBefore(newNode.get(), malformedTableParent->lastChild(), exceptionCode); 1698 } else { 1699 current->appendChild(newNode.get(), exceptionCode); 1700 } 1701 // FIXME: Is it really OK to ignore the exceptions here? 1702 1703 // Now push a new stack element for this node we just created. 1704 pushBlock(elem->id, elem->level); 1705 1706 // Set our strayTableContent boolean if needed, so that the reopened tag also knows 1707 // that it is inside a malformed table. 1708 blockStack->strayTableContent = malformedTableParent != nullptr; 1709 if (blockStack->strayTableContent) { 1710 inStrayTableContent++; 1711 } 1712 1713 // Clear our malformed table parent variable. 1714 malformedTableParent = nullptr; 1715 1716 // Update |current| manually to point to the new node. 1717 setCurrent(newNode.get()); 1718 1719 // Advance to the next tag that needs to be reopened. 1720 HTMLStackElem *next = elem->next; 1721 delete elem; 1722 elem = next; 1723 } 1724 } 1725 1726 void KHTMLParser::pushBlock(int _id, int _level) 1727 { 1728 HTMLStackElem *Elem = new HTMLStackElem(_id, _level, current, m_inline, blockStack); 1729 1730 blockStack = Elem; 1731 addForbidden(_id, forbiddenTag); 1732 } 1733 1734 void KHTMLParser::generateImpliedEndTags(int _id) 1735 { 1736 HTMLStackElem *Elem = blockStack; 1737 1738 int level = tagPriority(_id); 1739 while (Elem && Elem->id != _id) { 1740 HTMLStackElem *NextElem = Elem->next; 1741 if (endTagRequirement(Elem->id) == DOM::OPTIONAL && Elem->level <= level) { 1742 popOneBlock(); 1743 } else { 1744 break; 1745 } 1746 Elem = NextElem; 1747 } 1748 } 1749 1750 void KHTMLParser::popOptionalBlock(int _id) 1751 { 1752 bool found = false; 1753 HTMLStackElem *Elem = blockStack; 1754 1755 int level = tagPriority(_id); 1756 while (Elem) { 1757 if (Elem->id == _id) { 1758 found = true; 1759 break; 1760 } 1761 if (Elem->level > level || (endTagRequirement(Elem->id) != DOM::OPTIONAL && !isResidualStyleTag(Elem->id))) { 1762 break; 1763 } 1764 Elem = Elem->next; 1765 } 1766 1767 if (found) { 1768 generateImpliedEndTags(_id); 1769 popBlock(_id); 1770 } 1771 } 1772 1773 bool KHTMLParser::isElementInScope(int _id) 1774 { 1775 // HTML5 8.2.3.2 1776 HTMLStackElem *Elem = blockStack; 1777 while (Elem && Elem->id != _id) { 1778 if (DOM::checkIsScopeBoundary(Elem->id)) { 1779 return false; 1780 } 1781 Elem = Elem->next; 1782 } 1783 return Elem; 1784 } 1785 1786 bool KHTMLParser::isHeadingInScope() 1787 { 1788 HTMLStackElem *Elem = blockStack; 1789 while (Elem && (Elem->id < ID_H1 || Elem->id > ID_H6)) { 1790 if (DOM::checkIsScopeBoundary(Elem->id)) { 1791 return false; 1792 } 1793 Elem = Elem->next; 1794 } 1795 return Elem; 1796 } 1797 1798 void KHTMLParser::popBlock(int _id) 1799 { 1800 HTMLStackElem *Elem = blockStack; 1801 int maxLevel = 0; 1802 1803 #ifdef PARSER_DEBUG 1804 qCDebug(KHTML_LOG) << "popBlock(" << getParserPrintableName(_id) << ")"; 1805 while (Elem) { 1806 qCDebug(KHTML_LOG) << " > " << getParserPrintableName(Elem->id); 1807 Elem = Elem->next; 1808 } 1809 Elem = blockStack; 1810 #endif 1811 1812 while (Elem && (Elem->id != _id)) { 1813 if (maxLevel < Elem->level) { 1814 maxLevel = Elem->level; 1815 } 1816 Elem = Elem->next; 1817 } 1818 if (!Elem) { 1819 return; 1820 } 1821 1822 if (maxLevel > Elem->level) { 1823 // We didn't match because the tag is in a different scope, e.g., 1824 // <b><p>Foo</b>. Try to correct the problem. 1825 if (!isResidualStyleTag(_id)) { 1826 return; 1827 } 1828 return handleResidualStyleCloseTagAcrossBlocks(Elem); 1829 } 1830 1831 bool isAffectedByStyle = isAffectedByResidualStyle(Elem->id); 1832 HTMLStackElem *residualStyleStack = nullptr; 1833 NodeImpl *malformedTableParent = nullptr; 1834 1835 Elem = blockStack; 1836 1837 while (Elem) { 1838 if (Elem->id == _id) { 1839 int strayTable = inStrayTableContent; 1840 popOneBlock(); 1841 Elem = nullptr; 1842 1843 // This element was the root of some malformed content just inside an implicit or 1844 // explicit <tbody> or <tr>. 1845 // If we end up needing to reopen residual style tags, the root of the reopened chain 1846 // must also know that it is the root of malformed content inside a <tbody>/<tr>. 1847 if (strayTable && (inStrayTableContent < strayTable) && residualStyleStack) { 1848 NodeImpl *curr = current; 1849 while (curr && curr->id() != ID_TABLE) { 1850 curr = curr->parentNode(); 1851 } 1852 malformedTableParent = curr ? curr->parentNode() : nullptr; 1853 } 1854 } else { 1855 // Schedule this tag for reopening 1856 // after we complete the close of this entire block. 1857 NodeImpl *currNode = current; 1858 if (isAffectedByStyle && isResidualStyleTag(Elem->id)) { 1859 // We've overloaded the use of stack elements and are just reusing the 1860 // struct with a slightly different meaning to the variables. Instead of chaining 1861 // from innermost to outermost, we build up a list of all the tags we need to reopen 1862 // from the outermost to the innermost, i.e., residualStyleStack will end up pointing 1863 // to the outermost tag we need to reopen. 1864 // We also set Elem->node to be the actual element that corresponds to the ID stored in 1865 // Elem->id rather than the node that you should pop to when the element gets pulled off 1866 // the stack. 1867 popOneBlock(false); 1868 Elem->next = residualStyleStack; 1869 Elem->setNode(currNode); 1870 residualStyleStack = Elem; 1871 } else { 1872 popOneBlock(); 1873 } 1874 Elem = blockStack; 1875 } 1876 } 1877 1878 reopenResidualStyleTags(residualStyleStack, malformedTableParent); 1879 } 1880 1881 void KHTMLParser::popOneBlock(bool delBlock) 1882 { 1883 HTMLStackElem *Elem = blockStack; 1884 1885 // we should never get here, but some bad html might cause it. 1886 #ifndef PARSER_DEBUG 1887 if (!Elem) { 1888 return; 1889 } 1890 #else 1891 qCDebug(KHTML_LOG) << "popping block: " << getParserPrintableName(Elem->id) << "(" << Elem->id << ")"; 1892 #endif 1893 1894 #if SPEED_DEBUG < 1 1895 if ((Elem->node != current)) { 1896 if (current->maintainsState() && document) { 1897 document->registerMaintainsState(current); 1898 document->attemptRestoreState(current); 1899 } 1900 current->close(); 1901 } 1902 #endif 1903 1904 removeForbidden(Elem->id, forbiddenTag); 1905 1906 blockStack = Elem->next; 1907 // we only set inline to false, if the element we close is a block level element. 1908 // This helps getting cases as <p><b>bla</b> <b>bla</b> right. 1909 1910 m_inline = Elem->m_inline; 1911 1912 if (current->id() == ID_FORM && form && inStrayTableContent) { 1913 form->setMalformed(true); 1914 } 1915 1916 setCurrent(Elem->node); 1917 1918 if (Elem->strayTableContent) { 1919 inStrayTableContent--; 1920 } 1921 1922 if (delBlock) { 1923 delete Elem; 1924 } 1925 } 1926 1927 void KHTMLParser::popInlineBlocks() 1928 { 1929 while (blockStack && current->isInline() && current->id() != ID_FONT) { 1930 popOneBlock(); 1931 } 1932 } 1933 1934 void KHTMLParser::freeBlock() 1935 { 1936 while (blockStack) { 1937 popOneBlock(); 1938 } 1939 blockStack = nullptr; 1940 } 1941 1942 void KHTMLParser::createHead() 1943 { 1944 if (head || !doc()->documentElement()) { 1945 return; 1946 } 1947 1948 head = new HTMLHeadElementImpl(document); 1949 HTMLElementImpl *body = doc()->body(); 1950 int exceptioncode = 0; 1951 doc()->documentElement()->insertBefore(head.get(), body, exceptioncode); 1952 if (exceptioncode) { 1953 #ifdef PARSER_DEBUG 1954 qCDebug(KHTML_LOG) << "creation of head failed!!!!:" << exceptioncode; 1955 #endif 1956 delete head.get(); 1957 head = nullptr; 1958 } 1959 1960 // If the body does not exist yet, then the <head> should be pushed as the current block. 1961 if (head && !body) { 1962 pushBlock(head->id(), tagPriority(head->id())); 1963 setCurrent(head.get()); 1964 } 1965 } 1966 1967 NodeImpl *KHTMLParser::handleIsindex(Token *t) 1968 { 1969 NodeImpl *n; 1970 HTMLFormElementImpl *myform = form; 1971 if (!myform) { 1972 myform = new HTMLFormElementImpl(document, true); 1973 n = myform; 1974 } else { 1975 n = new HTMLDivElementImpl(document, ID_DIV); 1976 } 1977 NodeImpl *child = new HTMLHRElementImpl(document); 1978 n->addChild(child); 1979 DOMStringImpl *a = t->attrs ? t->attrs->getValue(ATTR_PROMPT) : nullptr; 1980 DOMString text = i18n("This is a searchable index. Enter search keywords: "); 1981 if (a) { 1982 text = a; 1983 } 1984 child = new TextImpl(document, text.implementation()); 1985 n->addChild(child); 1986 child = new HTMLIsIndexElementImpl(document, myform); 1987 static_cast<ElementImpl *>(child)->setAttribute(ATTR_TYPE, "khtml_isindex"); 1988 n->addChild(child); 1989 child = new HTMLHRElementImpl(document); 1990 n->addChild(child); 1991 1992 return n; 1993 } 1994 1995 void KHTMLParser::startBody() 1996 { 1997 if (inBody) { 1998 return; 1999 } 2000 2001 inBody = true; 2002 2003 if (isindex) { 2004 insertNode(isindex, true /* don't decend into this node */); 2005 isindex = nullptr; 2006 } 2007 }