File indexing completed on 2024-05-05 16:10:16

0001 /*
0002     This file is part of the KDE libraries
0003 
0004     Copyright (C) 1997 Martin Jones (mjones@kde.org)
0005               (C) 1997 Torben Weis (weis@kde.org)
0006               (C) 1999,2001 Lars Knoll (knoll@kde.org)
0007               (C) 2000,2001 Dirk Mueller (mueller@kde.org)
0008               (C) 2003 Apple Computer, Inc.
0009 
0010     This library is free software; you can redistribute it and/or
0011     modify it under the terms of the GNU Library General Public
0012     License as published by the Free Software Foundation; either
0013     version 2 of the License, or (at your option) any later version.
0014 
0015     This library is distributed in the hope that it will be useful,
0016     but WITHOUT ANY WARRANTY; without even the implied warranty of
0017     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
0018     Library General Public License for more details.
0019 
0020     You should have received a copy of the GNU Library General Public License
0021     along with this library; see the file COPYING.LIB.  If not, write to
0022     the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
0023     Boston, MA 02110-1301, USA.
0024 */
0025 //----------------------------------------------------------------------------
0026 //
0027 // KDE HTML Widget -- HTML Parser
0028 // #define PARSER_DEBUG
0029 
0030 #include "htmlparser.h"
0031 
0032 #include <dom/dom_exception.h>
0033 
0034 #include <html/html_baseimpl.h>
0035 #include <html/html_blockimpl.h>
0036 #include <html/html_canvasimpl.h>
0037 #include <html/html_documentimpl.h>
0038 #include <html/html_elementimpl.h>
0039 #include <html/html_formimpl.h>
0040 #include <html/html_headimpl.h>
0041 #include <html/html_imageimpl.h>
0042 #include <html/html_inlineimpl.h>
0043 #include <html/html_listimpl.h>
0044 #include <html/html_miscimpl.h>
0045 #include <html/html_tableimpl.h>
0046 #include <html/html_objectimpl.h>
0047 #include <html/HTMLAudioElement.h>
0048 #include <html/HTMLVideoElement.h>
0049 #include <html/HTMLSourceElement.h>
0050 #include <xml/dom_textimpl.h>
0051 #include <xml/dom_nodeimpl.h>
0052 #include <html/htmltokenizer.h>
0053 #include <khtmlview.h>
0054 #include <khtml_part.h>
0055 #include <khtml_global.h>
0056 #include <css/cssproperties.h>
0057 #include <css/cssvalues.h>
0058 
0059 #include <rendering/render_object.h>
0060 
0061 #include "khtml_debug.h"
0062 #include <klocalizedstring.h>
0063 
0064 // Turn off gnu90 inlining to avoid linker errors
0065 #undef __GNUC_STDC_INLINE__
0066 #undef __GNUC_GNU_INLINE__
0067 #include <doctypes.h>
0068 
0069 #undef OPTIONAL  // for win32, MinGW
0070 
0071 using namespace DOM;
0072 using namespace khtml;
0073 
0074 #ifdef PARSER_DEBUG
0075 static QString getParserPrintableName(int id)
0076 {
0077     if (id >= ID_CLOSE_TAG) {
0078         return "/" + getPrintableName(id - ID_CLOSE_TAG);
0079     } else {
0080         return getPrintableName(id);
0081     }
0082 }
0083 #endif
0084 
0085 //----------------------------------------------------------------------------
0086 
0087 /**
0088  * @internal
0089  */
0090 class HTMLStackElem
0091 {
0092 public:
0093     HTMLStackElem(int _id,
0094                   int _level,
0095                   DOM::NodeImpl *_node,
0096                   bool _inline_,
0097                   HTMLStackElem *_next)
0098         :
0099         id(_id),
0100         level(_level),
0101         strayTableContent(false),
0102         m_inline(_inline_),
0103         node(_node),
0104         next(_next)
0105     {
0106         node->ref();
0107     }
0108 
0109     ~HTMLStackElem()
0110     {
0111         node->deref();
0112     }
0113 
0114     void setNode(NodeImpl *newNode)
0115     {
0116         newNode->ref();
0117         node->deref();
0118         node = newNode;
0119     }
0120 
0121     int       id;
0122     int       level;
0123     bool      strayTableContent;
0124     bool m_inline;
0125     NodeImpl *node;
0126     HTMLStackElem *next;
0127 };
0128 
0129 /**
0130  * @internal
0131  *
0132  * The parser parses tokenized input into the document, building up the
0133  * document tree. If the document is wellformed, parsing it is
0134  * straightforward.
0135  * Unfortunately, people can't write wellformed HTML documents, so the parser
0136  * has to be tolerant about errors.
0137  *
0138  * We have to take care of the following error conditions:
0139  * 1. The element being added is explicitly forbidden inside some outer tag.
0140  *    In this case we should close all tags up to the one, which forbids
0141  *    the element, and add it afterwards.
0142  * 2. We are not allowed to add the element directly. It could be, that
0143  *    the person writing the document forgot some tag inbetween (or that the
0144  *    tag inbetween is optional...) This could be the case with the following
0145  *    tags: HTML HEAD BODY TBODY TR TD LI (did I forget any?)
0146  * 3. We wan't to add a block element inside to an inline element. Close all
0147  *    inline elements up to the next higher block element.
0148  * 4. If this doesn't help close elements, until we are allowed to add the
0149  *    element or ignore the tag.
0150  *
0151  */
0152 
0153 KHTMLParser::KHTMLParser(KHTMLView *_parent, DocumentImpl *doc)
0154 {
0155     //qCDebug(KHTML_LOG) << "parser constructor";
0156 #if SPEED_DEBUG > 0
0157     qt.start();
0158 #endif
0159 
0160     HTMLWidget    = _parent;
0161     document      = doc;
0162 
0163     blockStack = nullptr;
0164     current = nullptr;
0165 
0166     // ID_CLOSE_TAG == Num of tags
0167     forbiddenTag = new ushort[ID_CLOSE_TAG + 1];
0168 
0169     reset();
0170 }
0171 
0172 KHTMLParser::KHTMLParser(DOM::DocumentFragmentImpl *i, DocumentImpl *doc)
0173 {
0174     HTMLWidget = nullptr;
0175     document = doc;
0176 
0177     forbiddenTag = new ushort[ID_CLOSE_TAG + 1];
0178 
0179     blockStack = nullptr;
0180     current = nullptr;
0181 
0182     reset();
0183 
0184     setCurrent(i);
0185 
0186     inBody = true;
0187 }
0188 
0189 KHTMLParser::~KHTMLParser()
0190 {
0191 #if SPEED_DEBUG > 0
0192     qCDebug(KHTML_LOG) << "TIME: parsing time was = " << qt.elapsed();
0193 #endif
0194 
0195     freeBlock();
0196 
0197     if (current) {
0198         current->deref();
0199     }
0200 
0201     delete [] forbiddenTag;
0202     delete isindex;
0203 }
0204 
0205 void KHTMLParser::reset()
0206 {
0207     setCurrent(document);
0208 
0209     freeBlock();
0210 
0211     // before parsing no tags are forbidden...
0212     memset(forbiddenTag, 0, (ID_CLOSE_TAG + 1)*sizeof(ushort));
0213 
0214     inBody = false;
0215     haveFrameSet = false;
0216     haveContent = false;
0217     haveBody = false;
0218     haveTitle = false;
0219     inSelect = false;
0220     inStrayTableContent = 0;
0221     m_inline = false;
0222 
0223     form = nullptr;
0224     map = nullptr;
0225     end = false;
0226     isindex = nullptr;
0227 
0228     discard_until = 0;
0229 }
0230 
0231 void KHTMLParser::parseToken(Token *t)
0232 {
0233     if (t->tid > 2 * ID_CLOSE_TAG) {
0234         // qCDebug(KHTML_LOG) << "Unknown tag!! tagID = " << t->tid;
0235         return;
0236     }
0237     if (discard_until) {
0238         if (t->tid == discard_until) {
0239             discard_until = 0;
0240         }
0241 
0242         // do not skip </iframe>
0243         if (discard_until || current->id() + ID_CLOSE_TAG != t->tid) {
0244             return;
0245         }
0246     }
0247 
0248 #ifdef PARSER_DEBUG
0249     qCDebug(KHTML_LOG) << "\n\n==> parser: processing token " << getParserPrintableName(t->tid) << "(" << t->tid << ")"
0250              << " current = " << getParserPrintableName(current->id()) << "(" << current->id() << ")";
0251     qCDebug(KHTML_LOG) << "inline=" << m_inline << " inBody=" << inBody << " haveFrameSet=" << haveFrameSet << " haveContent=" << haveContent;
0252 #endif
0253 
0254     // holy shit. apparently some sites use </br> instead of <br>
0255     // be compatible with IE and NS
0256     if (t->tid == ID_BR + ID_CLOSE_TAG && document->inCompatMode()) {
0257         t->tid -= ID_CLOSE_TAG;
0258     }
0259 
0260     if (t->tid > ID_CLOSE_TAG) {
0261         processCloseTag(t);
0262         return;
0263     }
0264 
0265     // ignore spaces, if we're not inside a paragraph or other inline code
0266     if (t->tid == ID_TEXT && t->text) {
0267         if (inBody && !skipMode() &&
0268                 current->id() != ID_STYLE && current->id() != ID_TITLE &&
0269                 current->id() != ID_SCRIPT &&
0270                 !t->text->containsOnlyWhitespace()) {
0271             haveContent = true;
0272         }
0273 #ifdef PARSER_DEBUG
0274 
0275         qCDebug(KHTML_LOG) << "length=" << t->text->l << " text='" << QString::fromRawData(t->text->s, t->text->l) << "'";
0276 #endif
0277     }
0278 
0279     NodeImpl *n = getElement(t);
0280     // just to be sure, and to catch currently unimplemented stuff
0281     if (!n) {
0282         return;
0283     }
0284 
0285     // set attributes
0286     if (n->isElementNode() && t->tid != ID_ISINDEX) {
0287         ElementImpl *e = static_cast<ElementImpl *>(n);
0288         e->setAttributeMap(t->attrs);
0289     }
0290 
0291     // if this tag is forbidden inside the current context, pop
0292     // blocks until we are allowed to add it...
0293     while (blockStack && forbiddenTag[t->tid]) {
0294 #ifdef PARSER_DEBUG
0295         qCDebug(KHTML_LOG) << "t->id: " << t->tid << " is forbidden :-( ";
0296 #endif
0297         popOneBlock();
0298     }
0299 
0300     // sometimes flat doesn't make sense
0301     switch (t->tid) {
0302     case ID_SELECT:
0303     case ID_OPTION:
0304         t->flat = false;
0305     }
0306 
0307     // the tokenizer needs the feedback for space discarding
0308     if (tagPriority(t->tid) == 0) {
0309         t->flat = true;
0310     }
0311 
0312     if (!insertNode(n, t->flat)) {
0313         // we couldn't insert the node...
0314 #ifdef PARSER_DEBUG
0315         qCDebug(KHTML_LOG) << "insertNode failed current=" << current->id() << ", new=" << n->id() << "!";
0316 #endif
0317         if (map == n) {
0318 #ifdef PARSER_DEBUG
0319             qCDebug(KHTML_LOG) << "  --> resetting map!";
0320 #endif
0321             map = nullptr;
0322         }
0323         if (form == n) {
0324 #ifdef PARSER_DEBUG
0325             qCDebug(KHTML_LOG) << "   --> resetting form!";
0326 #endif
0327             form = nullptr;
0328         }
0329         delete n;
0330     }
0331 }
0332 
0333 void KHTMLParser::parseDoctypeToken(DoctypeToken *t)
0334 {
0335     // Ignore any doctype after the first. TODO It should be also ignored when processing DocumentFragment
0336     if (current != document || document->doctype()) {
0337         return;
0338     }
0339 
0340     DocumentTypeImpl *doctype = new DocumentTypeImpl(document->implementation(), document, t->name, t->publicID, t->systemID);
0341     if (!t->internalSubset.isEmpty()) {
0342         doctype->setInternalSubset(t->internalSubset);
0343     }
0344     document->addChild(doctype);
0345 
0346     // Determine parse mode here
0347     // This code more or less mimics Mozilla's implementation.
0348     //
0349     // There are three possible parse modes:
0350     // COMPAT - quirks mode emulates WinIE
0351     // and NS4.  CSS parsing is also relaxed in this mode, e.g., unit types can
0352     // be omitted from numbers.
0353     // ALMOST STRICT - This mode is identical to strict mode
0354     // except for its treatment of line-height in the inline box model.  For
0355     // now (until the inline box model is re-written), this mode is identical
0356     // to STANDARDS mode.
0357     // STRICT - no quirks apply.  Web pages will obey the specifications to
0358     // the letter.
0359 
0360     if (!document->isHTMLDocument()) { // FIXME Could document be non-HTML?
0361         return;
0362     }
0363     DOM::HTMLDocumentImpl *htmldoc = static_cast<DOM::HTMLDocumentImpl *>(document);
0364     if (t->name.toLower() == "html") {
0365         if (!t->internalSubset.isEmpty() || t->publicID.isEmpty()) {
0366             // Internal subsets always denote full standards, as does
0367             // a doctype without a public ID.
0368             htmldoc->changeModes(DOM::DocumentImpl::Strict, DOM::DocumentImpl::Html4);
0369         } else {
0370             // We have to check a list of public IDs to see what we
0371             // should do.
0372             QString lowerPubID = t->publicID.toLower();
0373             QByteArray pubIDStr = lowerPubID.toLocal8Bit();
0374 
0375             // Look up the entry in our gperf-generated table.
0376             const PubIDInfo *doctypeEntry = Perfect_Hash::findDoctypeEntry(pubIDStr.constData(), t->publicID.length());
0377             if (!doctypeEntry) {
0378                 // The DOCTYPE is not in the list.  Assume strict mode.
0379                 // ### Doesn't make any sense, but it's what Mozilla does.
0380                 htmldoc->changeModes(DOM::DocumentImpl::Strict, DOM::DocumentImpl::Html4);
0381             } else {
0382                 switch ((!t->systemID.isEmpty()) ?
0383                         doctypeEntry->mode_if_sysid :
0384                         doctypeEntry->mode_if_no_sysid) {
0385                 case PubIDInfo::eQuirks3:
0386                     htmldoc->changeModes(DOM::DocumentImpl::Compat, DOM::DocumentImpl::Html3);
0387                     break;
0388                 case PubIDInfo::eQuirks:
0389                     htmldoc->changeModes(DOM::DocumentImpl::Compat, DOM::DocumentImpl::Html4);
0390                     break;
0391                 case PubIDInfo::eAlmostStandards:
0392                     htmldoc->changeModes(DOM::DocumentImpl::Transitional, DOM::DocumentImpl::Html4);
0393                     break;
0394                 default:
0395                     assert(!"Unknown parse mode");
0396                 }
0397             }
0398         }
0399     } else {
0400         // Malformed doctype implies quirks mode.
0401         htmldoc->changeModes(DOM::DocumentImpl::Compat, DOM::DocumentImpl::Html3);
0402     }
0403 }
0404 
0405 static bool isTableRelatedTag(int id)
0406 {
0407     return (id == ID_TR || id == ID_TD || id == ID_TABLE || id == ID_TBODY || id == ID_TFOOT || id == ID_THEAD ||
0408             id == ID_TH);
0409 }
0410 
0411 bool KHTMLParser::insertNode(NodeImpl *n, bool flat)
0412 {
0413     int id = n->id();
0414 
0415     // <table> is never allowed inside stray table content.  Always pop out of the stray table content
0416     // and close up the first table, and then start the second table as a sibling.
0417     if (inStrayTableContent && id == ID_TABLE) {
0418         popBlock(ID_TABLE);
0419     }
0420 
0421     // let's be stupid and just try to insert it.
0422     // this should work if the document is wellformed
0423 #ifdef PARSER_DEBUG
0424     NodeImpl *tmp = current;
0425 #endif
0426     NodeImpl *newNode = current->addChild(n);
0427     if (newNode) {
0428 #ifdef PARSER_DEBUG
0429         qCDebug(KHTML_LOG) << "added " << n->nodeName().string() << " to " << tmp->nodeName().string() << ", new current=" << newNode->nodeName().string();
0430 #endif
0431         // We allow TABLE > FORM in dtd.cpp, but do not allow the form have children in this case
0432         if (current->id() == ID_TABLE && id == ID_FORM) {
0433             flat = true;
0434             static_cast<HTMLFormElementImpl *>(n)->setMalformed(true);
0435         }
0436 
0437         // don't push elements without end tag on the stack
0438         if (tagPriority(id) != 0 && !flat) {
0439 #if SPEED_DEBUG < 2
0440             if (!n->attached() && HTMLWidget) {
0441                 n->attach();
0442             }
0443 #endif
0444             if (n->isInline()) {
0445                 m_inline = true;
0446             }
0447             pushBlock(id, tagPriority(id));
0448             setCurrent(newNode);
0449         } else {
0450 #if SPEED_DEBUG < 2
0451             if (!n->attached() && HTMLWidget) {
0452                 n->attach();
0453             }
0454             if (n->maintainsState()) {
0455                 document->registerMaintainsState(n);
0456                 document->attemptRestoreState(n);
0457             }
0458             n->close();
0459 #endif
0460             if (n->isInline()) {
0461                 m_inline = true;
0462             }
0463         }
0464 
0465 #if SPEED_DEBUG < 1
0466         if (tagPriority(id) == 0 && n->renderer()) {
0467             n->renderer()->calcMinMaxWidth();
0468         }
0469 #endif
0470         return true;
0471     } else {
0472 #ifdef PARSER_DEBUG
0473         qCDebug(KHTML_LOG) << "ADDING NODE FAILED!!!! current = " << current->nodeName().string() << ", new = " << n->nodeName().string();
0474 #endif
0475         // error handling...
0476         HTMLElementImpl *e;
0477         bool handled = false;
0478 
0479         // first switch on current element for elements with optional end-tag and inline-only content
0480         switch (current->id()) {
0481         case ID_P:
0482         case ID_DT:
0483             if (!n->isInline()) {
0484                 popBlock(current->id());
0485                 return insertNode(n);
0486             }
0487             break;
0488         case ID_TITLE:
0489             popBlock(current->id());
0490             return insertNode(n);
0491         default:
0492             break;
0493         }
0494 
0495         // switch according to the element to insert
0496         switch (id) {
0497         case ID_TR:
0498         case ID_TH:
0499         case ID_TD:
0500             if (inStrayTableContent && !isTableRelatedTag(current->id())) {
0501                 // pop out to the nearest enclosing table-related tag.
0502                 while (blockStack && !isTableRelatedTag(current->id())) {
0503                     popOneBlock();
0504                 }
0505                 return insertNode(n);
0506             }
0507             break;
0508         case ID_HEAD:
0509             // ### allow not having <HTML> in at all, as per HTML spec
0510             if (!current->isDocumentNode() && current->id() != ID_HTML) {
0511                 return false;
0512             }
0513             break;
0514         case ID_COMMENT:
0515             if (head) {
0516                 break;
0517             }
0518         case ID_META:
0519         case ID_LINK:
0520         case ID_ISINDEX:
0521         case ID_BASE:
0522             if (!head) {
0523                 createHead();
0524             }
0525             if (head) {
0526                 if (head->addChild(n)) {
0527 #if SPEED_DEBUG < 2
0528                     if (!n->attached() && HTMLWidget) {
0529                         n->attach();
0530                     }
0531 #endif
0532                 }
0533 
0534                 return true;
0535             }
0536 
0537             break;
0538         case ID_HTML:
0539             if (!current->isDocumentNode()) {
0540                 if (doc()->documentElement()->id() == ID_HTML) {
0541                     // we have another <HTML> element.... apply attributes to existing one
0542                     // make sure we don't overwrite already existing attributes
0543                     NamedAttrMapImpl *map = static_cast<ElementImpl *>(n)->attributes(true);
0544                     NamedAttrMapImpl *bmap = static_cast<ElementImpl *>(doc()->documentElement())->attributes(false);
0545                     bool changed = false;
0546                     for (unsigned long l = 0; map && l < map->length(); ++l) {
0547                         NodeImpl::Id attrId = map->idAt(l);
0548                         DOMStringImpl *attrValue = map->valueAt(l);
0549                         changed = !bmap->getValue(attrId);
0550                         bmap->setValue(attrId, attrValue);
0551                     }
0552                     if (changed) {
0553                         doc()->recalcStyle(NodeImpl::Inherit);
0554                     }
0555                 }
0556                 return false;
0557             }
0558             break;
0559         case ID_TITLE:
0560         case ID_STYLE:
0561             if (!head) {
0562                 createHead();
0563             }
0564             if (head) {
0565                 DOM::NodeImpl *newNode = head->addChild(n);
0566                 if (newNode) {
0567                     pushBlock(id, tagPriority(id));
0568                     setCurrent(newNode);
0569 #if SPEED_DEBUG < 2
0570                     if (!n->attached() && HTMLWidget) {
0571                         n->attach();
0572                     }
0573 #endif
0574                 } else {
0575 #ifdef PARSER_DEBUG
0576                     qCDebug(KHTML_LOG) << "adding style before to body failed!!!!";
0577 #endif
0578                     discard_until = ID_STYLE + ID_CLOSE_TAG;
0579                     return false;
0580                 }
0581                 return true;
0582             } else if (inBody) {
0583                 discard_until = id + ID_CLOSE_TAG;
0584                 return false;
0585             }
0586             break;
0587         case ID_SCRIPT:
0588             // if we failed to insert it, go into skip mode
0589             discard_until = id + ID_CLOSE_TAG;
0590             break;
0591         case ID_BODY:
0592             if (inBody && doc()->body()) {
0593                 // we have another <BODY> element.... apply attributes to existing one
0594                 // make sure we don't overwrite already existing attributes
0595                 // some sites use <body bgcolor=rightcolor>...<body bgcolor=wrongcolor>
0596                 NamedAttrMapImpl *map = static_cast<ElementImpl *>(n)->attributes(true);
0597                 NamedAttrMapImpl *bmap = doc()->body()->attributes(false);
0598                 bool changed = false;
0599                 for (unsigned long l = 0; map && l < map->length(); ++l) {
0600                     NodeImpl::Id attrId = map->idAt(l);
0601                     DOMStringImpl *attrValue = map->valueAt(l);
0602                     if (!bmap->getValue(attrId)) {
0603                         bmap->setValue(attrId, attrValue);
0604                         changed = true;
0605                     }
0606                 }
0607                 if (changed) {
0608                     doc()->recalcStyle(NodeImpl::Inherit);
0609                 }
0610             } else if (current->isDocumentNode()) {
0611                 break;
0612             }
0613             return false;
0614             break;
0615 
0616         // the following is a hack to move non rendered elements
0617         // outside of tables.
0618         // needed for broken constructs like <table><form ...><tr>....
0619         case ID_INPUT: {
0620             ElementImpl *e = static_cast<ElementImpl *>(n);
0621             DOMString type = e->getAttribute(ATTR_TYPE);
0622 
0623             if (strcasecmp(type, "hidden") != 0) {
0624                 break;
0625             }
0626             // Fall through!
0627         }
0628         case ID_TEXT: {
0629             // Don't try to fit random white-space anywhere
0630             TextImpl *t = static_cast<TextImpl *>(n);
0631             if (t->containsOnlyWhitespace()) {
0632                 return false;
0633             }
0634             // ignore text inside the following elements.
0635             switch (current->id()) {
0636             case ID_SELECT:
0637                 return false;
0638             default:
0639                 ;
0640                 // fall through!!
0641             };
0642             break;
0643         }
0644         case ID_DL:
0645             popBlock(ID_DT);
0646             if (current->id() == ID_DL) {
0647                 e = new HTMLGenericElementImpl(document, ID_DD);
0648                 insertNode(e);
0649                 handled = true;
0650             }
0651             break;
0652         case ID_DT:
0653             e = new HTMLDListElementImpl(document);
0654             if (insertNode(e)) {
0655                 insertNode(n);
0656                 return true;
0657             }
0658             break;
0659         case ID_AREA: {
0660             if (map) {
0661                 map->addChild(n);
0662 #if SPEED_DEBUG < 2
0663                 if (!n->attached() && HTMLWidget) {
0664                     n->attach();
0665                 }
0666 #endif
0667                 handled = true;
0668                 return true;
0669             } else {
0670                 return false;
0671             }
0672         }
0673 
0674         case ID_THEAD:
0675         case ID_TBODY:
0676         case ID_TFOOT:
0677         case ID_CAPTION:
0678         case ID_COLGROUP: {
0679             if (isTableRelatedTag(current->id())) {
0680                 while (blockStack && current->id() != ID_TABLE && isTableRelatedTag(current->id())) {
0681                     popOneBlock();
0682                 }
0683                 return insertNode(n);
0684             }
0685         }
0686         default:
0687             break;
0688         }
0689 
0690         // switch on the currently active element
0691         switch (current->id()) {
0692         case ID_HTML:
0693             switch (id) {
0694             case ID_SCRIPT:
0695             case ID_STYLE:
0696             case ID_META:
0697             case ID_LINK:
0698             case ID_OBJECT:
0699             case ID_EMBED:
0700             case ID_TITLE:
0701             case ID_ISINDEX:
0702             case ID_BASE:
0703                 if (!head) {
0704                     head = new HTMLHeadElementImpl(document);
0705                     insertNode(head.get());
0706                     handled = true;
0707                 }
0708                 break;
0709             case ID_TEXT: {
0710                 TextImpl *t = static_cast<TextImpl *>(n);
0711                 if (t->containsOnlyWhitespace()) {
0712                     return false;
0713                 }
0714                 /* Fall through to default */
0715             }
0716             default:
0717                 if (haveFrameSet) {
0718                     break;
0719                 }
0720                 e = new HTMLBodyElementImpl(document);
0721                 startBody();
0722                 insertNode(e);
0723                 handled = true;
0724                 break;
0725             }
0726             break;
0727         case ID_HEAD:
0728             // we can get here only if the element is not allowed in head.
0729             if (id == ID_HTML) {
0730                 return false;
0731             } else {
0732                 // This means the body starts here...
0733                 if (haveFrameSet) {
0734                     break;
0735                 }
0736                 popBlock(ID_HEAD);
0737                 e = new HTMLBodyElementImpl(document);
0738                 startBody();
0739                 insertNode(e);
0740                 handled = true;
0741             }
0742             break;
0743         case ID_BODY:
0744             break;
0745         case ID_CAPTION:
0746             // Illegal content in a caption. Close the caption and try again.
0747             popBlock(ID_CAPTION);
0748             switch (id) {
0749             case ID_THEAD:
0750             case ID_TFOOT:
0751             case ID_TBODY:
0752             case ID_TR:
0753             case ID_TD:
0754             case ID_TH:
0755                 return insertNode(n, flat);
0756             }
0757             break;
0758         case ID_TABLE:
0759         case ID_THEAD:
0760         case ID_TFOOT:
0761         case ID_TBODY:
0762         case ID_TR:
0763             switch (id) {
0764             case ID_TABLE:
0765                 popBlock(ID_TABLE); // end the table
0766                 handled = checkChild(current->id(), id, doc()->inStrictMode());
0767                 break;
0768             default: {
0769                 NodeImpl *node = current;
0770                 NodeImpl *parent = node->parentNode();
0771                 // A script may have removed the current node's parent from the DOM
0772                 // http://bugzilla.opendarwin.org/show_bug.cgi?id=7137
0773                 // FIXME: we should do real recovery here and re-parent with the correct node.
0774                 if (!parent) {
0775                     return false;
0776                 }
0777                 NodeImpl *parentparent = parent->parentNode();
0778 
0779                 if (n->isTextNode() ||
0780                         (node->id() == ID_TR &&
0781                          (parent->id() == ID_THEAD ||
0782                           parent->id() == ID_TBODY ||
0783                           parent->id() == ID_TFOOT) && parentparent->id() == ID_TABLE) ||
0784                         (!checkChild(ID_TR, id) && (node->id() == ID_THEAD || node->id() == ID_TBODY || node->id() == ID_TFOOT) &&
0785                          parent->id() == ID_TABLE)) {
0786                     node = (node->id() == ID_TABLE) ? node :
0787                            ((node->id() == ID_TR) ? parentparent : parent);
0788                     NodeImpl *parent = node->parentNode();
0789                     if (!parent) {
0790                         return false;
0791                     }
0792                     int exceptioncode = 0;
0793 #ifdef PARSER_DEBUG
0794                     qCDebug(KHTML_LOG) << "calling insertBefore(" << n->nodeName().string() << "," << node->nodeName().string() << ")";
0795 #endif
0796                     parent->insertBefore(n, node, exceptioncode);
0797                     if (exceptioncode) {
0798 #ifndef PARSER_DEBUG
0799                         if (!n->isTextNode())
0800 #endif
0801                             // qCDebug(KHTML_LOG) << "adding content before table failed..";
0802                             break;
0803                     }
0804                     if (n->isElementNode() && tagPriority(id) != 0 &&
0805                             !flat && endTagRequirement(id) != DOM::FORBIDDEN) {
0806 
0807                         pushBlock(id, tagPriority(id));
0808                         setCurrent(n);
0809                         inStrayTableContent++;
0810                         blockStack->strayTableContent = true;
0811                     }
0812                     return true;
0813                 }
0814 
0815                 if (current->id() == ID_TR) {
0816                     e = new HTMLTableCellElementImpl(document, ID_TD);
0817                 } else if (current->id() == ID_TABLE) {
0818                     e = new HTMLTableSectionElementImpl(document, ID_TBODY, true /* implicit */);
0819                 } else {
0820                     e = new HTMLTableRowElementImpl(document);
0821                 }
0822 
0823                 insertNode(e);
0824                 handled = true;
0825                 break;
0826             } // end default
0827             } // end switch
0828             break;
0829         case ID_OBJECT:
0830             discard_until = id + ID_CLOSE_TAG;
0831             return false;
0832         case ID_UL:
0833         case ID_OL:
0834         case ID_DIR:
0835         case ID_MENU:
0836             e = new HTMLLIElementImpl(document);
0837             e->addCSSProperty(CSS_PROP_LIST_STYLE_TYPE, CSS_VAL_NONE);
0838             insertNode(e);
0839             handled = true;
0840             break;
0841         case ID_FORM:
0842             popBlock(ID_FORM);
0843             handled = true;
0844             break;
0845         case ID_SELECT:
0846             if (n->isInline()) {
0847                 return false;
0848             }
0849             break;
0850         case ID_P:
0851         case ID_H1:
0852         case ID_H2:
0853         case ID_H3:
0854         case ID_H4:
0855         case ID_H5:
0856         case ID_H6:
0857             if (!n->isInline()) {
0858                 popBlock(current->id());
0859                 handled = true;
0860             }
0861             break;
0862         case ID_OPTION:
0863         case ID_OPTGROUP:
0864             if (id == ID_OPTGROUP) {
0865                 popBlock(current->id());
0866                 handled = true;
0867             } else if (id == ID_SELECT) {
0868                 // IE treats a nested select as </select>. Let's do the same
0869                 popBlock(ID_SELECT);
0870                 break;
0871             }
0872             break;
0873         // head elements in the body should be ignored.
0874 
0875         case ID_ADDRESS:
0876         case ID_COLGROUP:
0877         case ID_FONT:
0878             popBlock(current->id());
0879             handled = true;
0880             break;
0881         default:
0882             if (current->isDocumentNode()) {
0883                 DocumentImpl *doc = static_cast<DocumentImpl *>(current);
0884                 if (!doc->documentElement()) {
0885                     e = new HTMLHtmlElementImpl(document);
0886                     insertNode(e);
0887                     handled = true;
0888                 }
0889             } else if (current->isInline()) {
0890                 popInlineBlocks();
0891                 handled = true;
0892             }
0893         }
0894 
0895         // if we couldn't handle the error, just rethrow the exception...
0896         if (!handled) {
0897             //qCDebug(KHTML_LOG) << "Exception handler failed in HTMLPArser::insertNode()";
0898             return false;
0899         }
0900 
0901         return insertNode(n);
0902     }
0903 }
0904 
0905 NodeImpl *KHTMLParser::getElement(Token *t)
0906 {
0907     NodeImpl *n = nullptr;
0908 
0909     switch (t->tid) {
0910     case ID_HTML:
0911         n = new HTMLHtmlElementImpl(document);
0912         break;
0913     case ID_HEAD:
0914         if (!head && (current->id() == ID_HTML || current->isDocumentNode())) {
0915             head = new HTMLHeadElementImpl(document);
0916             n = head.get();
0917         }
0918         break;
0919     case ID_BODY:
0920         // body no longer allowed if we have a frameset
0921         if (haveFrameSet) {
0922             break;
0923         }
0924         popBlock(ID_HEAD);
0925         n = new HTMLBodyElementImpl(document);
0926         haveBody =  true;
0927         startBody();
0928         break;
0929 
0930 // head elements
0931     case ID_BASE:
0932         n = new HTMLBaseElementImpl(document);
0933         break;
0934     case ID_LINK:
0935         n = new HTMLLinkElementImpl(document);
0936         break;
0937     case ID_META:
0938         n = new HTMLMetaElementImpl(document);
0939         break;
0940     case ID_STYLE:
0941         n = new HTMLStyleElementImpl(document);
0942         break;
0943     case ID_TITLE:
0944         // only one non-empty <title> allowed
0945         if (haveTitle) {
0946             discard_until = ID_TITLE + ID_CLOSE_TAG;
0947             break;
0948         }
0949         n = new HTMLTitleElementImpl(document);
0950         // we'll set haveTitle when closing the tag
0951         break;
0952 
0953 // frames
0954     case ID_FRAME:
0955         n = new HTMLFrameElementImpl(document);
0956         break;
0957     case ID_FRAMESET:
0958         popBlock(ID_HEAD);
0959         if (inBody && !haveFrameSet && !haveContent && !haveBody) {
0960             popBlock(ID_BODY);
0961             // ### actually for IE document.body returns the now hidden "body" element
0962             // we can't implement that behavior now because it could cause too many
0963             // regressions and the headaches are not worth the work as long as there is
0964             // no site actually relying on that detail (Dirk)
0965             if (static_cast<HTMLDocumentImpl *>(document)->body())
0966                 static_cast<HTMLDocumentImpl *>(document)->body()
0967                 ->addCSSProperty(CSS_PROP_DISPLAY, CSS_VAL_NONE);
0968             inBody = false;
0969         }
0970         if ((haveBody || haveContent || haveFrameSet) && current->id() == ID_HTML) {
0971             break;
0972         }
0973         n = new HTMLFrameSetElementImpl(document);
0974         haveFrameSet = true;
0975         startBody();
0976         break;
0977     // a bit a special case, since the frame is inlined...
0978     case ID_IFRAME:
0979         n = new HTMLIFrameElementImpl(document);
0980         break;
0981 
0982 // form elements
0983     case ID_FORM:
0984         // thou shall not nest <form> - NS/IE quirk
0985         if (form) {
0986             break;
0987         }
0988         n = form = new HTMLFormElementImpl(document, false);
0989         break;
0990     case ID_BUTTON:
0991         n = new HTMLButtonElementImpl(document, form);
0992         break;
0993     case ID_FIELDSET:
0994         n = new HTMLFieldSetElementImpl(document, form);
0995         break;
0996     case ID_INPUT:
0997         if (t->attrs &&
0998                 KHTMLGlobal::defaultHTMLSettings()->isAdFilterEnabled() &&
0999                 KHTMLGlobal::defaultHTMLSettings()->isHideAdsEnabled() &&
1000                 !strcasecmp(t->attrs->getValue(ATTR_TYPE), "image")) {
1001             const QString url = doc()->completeURL(DOMString(t->attrs->getValue(ATTR_SRC)).trimSpaces().string());
1002             if (KHTMLGlobal::defaultHTMLSettings()->isAdFiltered(url)) {
1003                 return nullptr;
1004             }
1005         }
1006         n = new HTMLInputElementImpl(document, form);
1007         break;
1008     case ID_ISINDEX:
1009         n = handleIsindex(t);
1010         if (!inBody) {
1011             isindex = n;
1012             n = nullptr;
1013         } else {
1014             t->flat = true;
1015         }
1016         break;
1017     case ID_KEYGEN:
1018         n = new HTMLKeygenElementImpl(document, form);
1019         break;
1020     case ID_LABEL:
1021         n = new HTMLLabelElementImpl(document);
1022         break;
1023     case ID_LEGEND:
1024         n = new HTMLLegendElementImpl(document, form);
1025         break;
1026     case ID_OPTGROUP:
1027         n = new HTMLOptGroupElementImpl(document, form);
1028         break;
1029     case ID_OPTION:
1030         popOptionalBlock(ID_OPTION);
1031         n = new HTMLOptionElementImpl(document, form);
1032         break;
1033     case ID_SELECT:
1034         inSelect = true;
1035         n = new HTMLSelectElementImpl(document, form);
1036         break;
1037     case ID_TEXTAREA:
1038         n = new HTMLTextAreaElementImpl(document, form);
1039         break;
1040 
1041 // lists
1042     case ID_DL:
1043         n = new HTMLDListElementImpl(document);
1044         break;
1045     case ID_DD:
1046         popOptionalBlock(ID_DT);
1047         popOptionalBlock(ID_DD);
1048         n = new HTMLGenericElementImpl(document, t->tid);
1049         break;
1050     case ID_DT:
1051         popOptionalBlock(ID_DD);
1052         popOptionalBlock(ID_DT);
1053         n = new HTMLGenericElementImpl(document, t->tid);
1054         break;
1055     case ID_UL: {
1056         n = new HTMLUListElementImpl(document);
1057         break;
1058     }
1059     case ID_OL: {
1060         n = new HTMLOListElementImpl(document);
1061         break;
1062     }
1063     case ID_DIR:
1064         n = new HTMLDirectoryElementImpl(document);
1065         break;
1066     case ID_MENU:
1067         n = new HTMLMenuElementImpl(document);
1068         break;
1069     case ID_LI:
1070         popOptionalBlock(ID_LI);
1071         n = new HTMLLIElementImpl(document);
1072         break;
1073 // formatting elements (block)
1074     case ID_BLOCKQUOTE:
1075         n = new HTMLGenericElementImpl(document, t->tid);
1076         break;
1077     case ID_LAYER:
1078     case ID_ILAYER:
1079         n = new HTMLLayerElementImpl(document, t->tid);
1080         break;
1081     case ID_P:
1082     case ID_DIV:
1083         n = new HTMLDivElementImpl(document, t->tid);
1084         break;
1085     case ID_H1:
1086     case ID_H2:
1087     case ID_H3:
1088     case ID_H4:
1089     case ID_H5:
1090     case ID_H6:
1091         n = new HTMLGenericElementImpl(document, t->tid);
1092         break;
1093     case ID_HR:
1094         n = new HTMLHRElementImpl(document);
1095         break;
1096     case ID_PRE:
1097     case ID_XMP:
1098     case ID_PLAINTEXT:
1099     case ID_LISTING:
1100         n = new HTMLPreElementImpl(document, t->tid);
1101         break;
1102 
1103 // font stuff
1104     case ID_BASEFONT:
1105         n = new HTMLBaseFontElementImpl(document);
1106         break;
1107     case ID_FONT:
1108         n = new HTMLFontElementImpl(document);
1109         break;
1110 
1111 // ins/del
1112     case ID_DEL:
1113     case ID_INS:
1114         n = new HTMLGenericElementImpl(document, t->tid);
1115         break;
1116 
1117 // anchor
1118     case ID_A:
1119         popBlock(ID_A);
1120 
1121         n = new HTMLAnchorElementImpl(document);
1122         break;
1123 
1124 // images
1125     case ID_IMAGE:
1126     case ID_IMG:
1127         if (t->attrs &&
1128                 KHTMLGlobal::defaultHTMLSettings()->isAdFilterEnabled() &&
1129                 KHTMLGlobal::defaultHTMLSettings()->isHideAdsEnabled()) {
1130             const QString url = doc()->completeURL(DOMString(t->attrs->getValue(ATTR_SRC)).trimSpaces().string());
1131             if (KHTMLGlobal::defaultHTMLSettings()->isAdFiltered(url)) {
1132                 return nullptr;
1133             }
1134         }
1135         n = new HTMLImageElementImpl(document, form);
1136         break;
1137 
1138     case ID_CANVAS:
1139         n = new HTMLCanvasElementImpl(document);
1140         break;
1141 
1142     case ID_MAP:
1143         map = new HTMLMapElementImpl(document);
1144         n = map;
1145         break;
1146     case ID_AREA:
1147         n = new HTMLAreaElementImpl(document);
1148         break;
1149 
1150 // objects, applets and scripts
1151     case ID_APPLET:
1152         n = new HTMLAppletElementImpl(document);
1153         break;
1154     case ID_EMBED:
1155         n = new HTMLEmbedElementImpl(document);
1156         break;
1157     case ID_OBJECT:
1158         n = new HTMLObjectElementImpl(document);
1159         break;
1160     case ID_PARAM:
1161         n = new HTMLParamElementImpl(document);
1162         break;
1163     case ID_SCRIPT: {
1164         HTMLScriptElementImpl *scriptElement = new HTMLScriptElementImpl(document);
1165         scriptElement->setCreatedByParser(true);
1166         n = scriptElement;
1167         break;
1168     }
1169 
1170 // media
1171     case ID_AUDIO:
1172         n = new HTMLAudioElement(document);
1173         break;
1174     case ID_VIDEO:
1175         n = new HTMLVideoElement(document);
1176         break;
1177     case ID_SOURCE:
1178         n = new HTMLSourceElement(document);
1179         break;
1180 
1181 // tables
1182     case ID_TABLE:
1183         n = new HTMLTableElementImpl(document);
1184         break;
1185     case ID_CAPTION:
1186         n = new HTMLTableCaptionElementImpl(document);
1187         break;
1188     case ID_COLGROUP:
1189     case ID_COL:
1190         n = new HTMLTableColElementImpl(document, t->tid);
1191         break;
1192     case ID_TR:
1193         popBlock(ID_TR);
1194         n = new HTMLTableRowElementImpl(document);
1195         break;
1196     case ID_TD:
1197     case ID_TH:
1198         popBlock(ID_TH);
1199         popBlock(ID_TD);
1200         n = new HTMLTableCellElementImpl(document, t->tid);
1201         break;
1202     case ID_TBODY:
1203     case ID_THEAD:
1204     case ID_TFOOT:
1205         popBlock(ID_THEAD);
1206         popBlock(ID_TBODY);
1207         popBlock(ID_TFOOT);
1208         n = new HTMLTableSectionElementImpl(document, t->tid, false);
1209         break;
1210 
1211 // inline elements
1212     case ID_BR:
1213         n = new HTMLBRElementImpl(document);
1214         break;
1215     case ID_Q:
1216         n = new HTMLGenericElementImpl(document, t->tid);
1217         break;
1218 
1219 // elements with no special representation in the DOM
1220 
1221 // block:
1222     case ID_ADDRESS:
1223     case ID_CENTER:
1224         n = new HTMLGenericElementImpl(document, t->tid);
1225         break;
1226 // inline
1227     // %fontstyle
1228     case ID_TT:
1229     case ID_U:
1230     case ID_B:
1231     case ID_I:
1232     case ID_S:
1233     case ID_STRIKE:
1234     case ID_BIG:
1235     case ID_SMALL:
1236 
1237     // %phrase
1238     case ID_EM:
1239     case ID_STRONG:
1240     case ID_DFN:
1241     case ID_CODE:
1242     case ID_SAMP:
1243     case ID_KBD:
1244     case ID_VAR:
1245     case ID_CITE:
1246     case ID_ABBR:
1247     case ID_ACRONYM:
1248 
1249     // %special
1250     case ID_SUB:
1251     case ID_SUP:
1252     case ID_SPAN:
1253     case ID_WBR:
1254     case ID_NOBR:
1255         if (t->tid == ID_NOBR || t->tid == ID_WBR) {
1256             popOptionalBlock(t->tid);
1257         }
1258     case ID_BDO:
1259         n = new HTMLGenericElementImpl(document, t->tid);
1260         break;
1261 
1262     // these are special, and normally not rendered
1263     case ID_NOEMBED:
1264         if (!t->flat) {
1265             n = new HTMLGenericElementImpl(document, t->tid);
1266             discard_until = ID_NOEMBED + ID_CLOSE_TAG;
1267         }
1268         return n;
1269     case ID_NOFRAMES:
1270         if (!t->flat) {
1271             n = new HTMLGenericElementImpl(document, t->tid);
1272             discard_until = ID_NOFRAMES + ID_CLOSE_TAG;
1273         }
1274         return n;
1275     case ID_NOSCRIPT:
1276         if (!t->flat) {
1277             n = new HTMLGenericElementImpl(document, t->tid);
1278             if (HTMLWidget && HTMLWidget->part()->jScriptEnabled()) {
1279                 discard_until = ID_NOSCRIPT + ID_CLOSE_TAG;
1280             }
1281         }
1282         return n;
1283     case ID_NOLAYER:
1284 //        discard_until = ID_NOLAYER + ID_CLOSE_TAG;
1285         return nullptr;
1286         break;
1287     case ID_MARQUEE:
1288         n = new HTMLMarqueeElementImpl(document);
1289         break;
1290 // text
1291     case ID_TEXT:
1292 //        qCDebug(KHTML_LOG) << "ID_TEXT: \"" << DOMString(t->text).string() << "\"";
1293         n = new TextImpl(document, t->text);
1294         break;
1295     case ID_COMMENT:
1296         n = new CommentImpl(document, t->text);
1297         break;
1298     default:
1299         n = new HTMLGenericElementImpl(document, t->tid);
1300         break;
1301 //         qCDebug(KHTML_LOG) << "Unknown tag " << t->tid << "!";
1302     }
1303     return n;
1304 }
1305 
1306 void KHTMLParser::processCloseTag(Token *t)
1307 {
1308     // FIXME: the below only behaves according to "in body" insertion mode (HTML5 8.2.5.10)
1309     //    - might need fixing when we have other insertion modes.
1310     switch (t->tid) {
1311     case ID_HTML+ID_CLOSE_TAG:
1312     case ID_BODY+ID_CLOSE_TAG:
1313         // we never trust those close tags, since stupid webpages close
1314         // them prematurely
1315         return;
1316     case ID_FORM+ID_CLOSE_TAG: // needs additional error checking. See spec.
1317         form = nullptr;
1318         if (!isElementInScope(ID_FORM)) {
1319             // Parse error. Ignore.
1320             return;
1321         }
1322         // this one is to get the right style on the body element
1323         break;
1324     case ID_MAP+ID_CLOSE_TAG:
1325         map = nullptr;
1326         break;
1327     case ID_SELECT+ID_CLOSE_TAG:
1328         inSelect = false;
1329         break;
1330     case ID_TITLE+ID_CLOSE_TAG:
1331         // Set haveTitle only if <title> isn't empty
1332         if (current->firstChild()) {
1333             haveTitle = true;
1334         }
1335         break;
1336     case ID_P+ID_CLOSE_TAG:
1337         if (!isElementInScope(ID_P)) {
1338             // Parse error. Handle as if <p> had been seen.
1339             t->tid = ID_P;
1340             parseToken(t);
1341             popBlock(ID_P);
1342             return;
1343         }
1344         break;
1345     case ID_ADDRESS+ID_CLOSE_TAG:
1346 //    case ID_ARTICLE+ID_CLOSE_TAG:
1347     case ID_BLOCKQUOTE+ID_CLOSE_TAG:
1348     case ID_CENTER+ID_CLOSE_TAG:
1349 //    case ID_DATAGRID+ID_CLOSE_TAG:
1350 //    case ID_DETAILS+ID_CLOSE_TAG:
1351 //    case ID_DIALOG+ID_CLOSE_TAG:
1352     case ID_DIR+ID_CLOSE_TAG:
1353     case ID_DIV+ID_CLOSE_TAG:
1354     case ID_DL+ID_CLOSE_TAG:
1355     case ID_FIELDSET+ID_CLOSE_TAG:
1356 //    case ID_FIGURE+ID_CLOSE_TAG:
1357 //    case ID_FOOTER+ID_CLOSE_TAG:
1358 //    case ID_HEADER+ID_CLOSE_TAG:
1359     case ID_LISTING+ID_CLOSE_TAG:
1360     case ID_MENU+ID_CLOSE_TAG:
1361 //    case ID_NAV+ID_CLOSE_TAG:
1362     case ID_OL+ID_CLOSE_TAG:
1363     case ID_PRE+ID_CLOSE_TAG:
1364 //    case ID_SECTION+ID_CLOSE_TAG:
1365     case ID_UL+ID_CLOSE_TAG:
1366 
1367     case ID_DD+ID_CLOSE_TAG:
1368     case ID_DT+ID_CLOSE_TAG:
1369     case ID_LI+ID_CLOSE_TAG:
1370 
1371     case ID_APPLET+ID_CLOSE_TAG: // those four should also "Clear the list of active formatting elements
1372     case ID_BUTTON+ID_CLOSE_TAG: // up to the last marker." whenever we implement adoption agency.
1373     case ID_MARQUEE+ID_CLOSE_TAG:
1374     case ID_OBJECT+ID_CLOSE_TAG:
1375 
1376     case ID_HEAD+ID_CLOSE_TAG: // ### according to HTML5, should be treated as 'Any other end tag'
1377         //     We'll do that when proper 'Any other end tag' handling is implemented.
1378         //     In the meantime, test scoping at least (#170694)
1379 
1380         if (!isElementInScope(t->tid - ID_CLOSE_TAG)) {
1381             // Parse error. Ignore token.
1382             return;
1383         }
1384         break;
1385     case ID_H1:
1386     case ID_H2:
1387     case ID_H3:
1388     case ID_H4:
1389     case ID_H5:
1390     case ID_H6:
1391         if (!isHeadingInScope()) {
1392             // Parse error. Ignore token.
1393             return;
1394         }
1395         break;
1396     case ID_A: // Formatting elements - will need special handling - cf. HTML5 "adoption agency algorithm"
1397     case ID_B: //                       meant to replace the "residual style" handling we have now.
1398     case ID_BIG:
1399     case ID_CODE:
1400     case ID_EM:
1401     case ID_FONT:
1402     case ID_I:
1403     case ID_NOBR:
1404     case ID_S:
1405     case ID_SMALL:
1406     case ID_STRIKE:
1407     case ID_STRONG:
1408     case ID_TT:
1409     case ID_U:
1410         break;
1411 
1412     default:
1413 //      otherTag = true; // FIXME: implement 'Any other end tag' handling
1414         break;
1415     }
1416 
1417 #ifdef PARSER_DEBUG
1418     qCDebug(KHTML_LOG) << "added the following children to " << current->nodeName().string();
1419     NodeImpl *child = current->firstChild();
1420     while (child != 0) {
1421         qCDebug(KHTML_LOG) << "    " << child->nodeName().string();
1422         child = child->nextSibling();
1423     }
1424 #endif
1425 
1426     generateImpliedEndTags(t->tid - ID_CLOSE_TAG);
1427     popBlock(t->tid - ID_CLOSE_TAG);
1428 
1429 #ifdef PARSER_DEBUG
1430     qCDebug(KHTML_LOG) << "closeTag --> current = " << current->nodeName().string();
1431 #endif
1432 }
1433 
1434 bool KHTMLParser::isResidualStyleTag(int _id)
1435 {
1436     switch (_id) {
1437     case ID_A:
1438     case ID_B:
1439     case ID_BIG:
1440     case ID_EM:
1441     case ID_FONT:
1442     case ID_I:
1443     case ID_NOBR:
1444     case ID_S:
1445     case ID_SMALL:
1446     case ID_STRIKE:
1447     case ID_STRONG:
1448     case ID_TT:
1449     case ID_U:
1450     case ID_DFN:
1451     case ID_CODE:
1452     case ID_SAMP:
1453     case ID_KBD:
1454     case ID_VAR:
1455     case ID_DEL:
1456     case ID_INS:
1457         return true;
1458     default:
1459         return false;
1460     }
1461 }
1462 
1463 bool KHTMLParser::isAffectedByResidualStyle(int _id)
1464 {
1465     if (isResidualStyleTag(_id)) {
1466         return true;
1467     }
1468 
1469     switch (_id) {
1470     case ID_P:
1471     case ID_DIV:
1472     case ID_BLOCKQUOTE:
1473     case ID_ADDRESS:
1474     case ID_H1:
1475     case ID_H2:
1476     case ID_H3:
1477     case ID_H4:
1478     case ID_H5:
1479     case ID_H6:
1480     case ID_CENTER:
1481     case ID_UL:
1482     case ID_OL:
1483     case ID_LI:
1484     case ID_DL:
1485     case ID_DT:
1486     case ID_DD:
1487     case ID_PRE:
1488     case ID_LISTING:
1489         return true;
1490     default:
1491         return false;
1492     }
1493 }
1494 
1495 void KHTMLParser::handleResidualStyleCloseTagAcrossBlocks(HTMLStackElem *elem)
1496 {
1497     // Find the element that crosses over to a higher level.
1498     // ### For now, if there is more than one, we will only make sure we close the residual style.
1499     int exceptionCode = 0;
1500     HTMLStackElem *curr = blockStack;
1501     HTMLStackElem *maxElem = nullptr;
1502     HTMLStackElem *endElem = nullptr;
1503     HTMLStackElem *prev = nullptr;
1504     HTMLStackElem *prevMaxElem = nullptr;
1505     bool advancedResidual = false; // ### if set we only close the residual style
1506     while (curr && curr != elem) {
1507         if (curr->level > elem->level) {
1508             if (!isAffectedByResidualStyle(curr->id)) {
1509                 return;
1510             }
1511             if (maxElem) {
1512                 advancedResidual = true;
1513             } else {
1514                 endElem = curr;
1515             }
1516             maxElem = curr;
1517             prevMaxElem = prev;
1518         }
1519 
1520         prev = curr;
1521         curr = curr->next;
1522     }
1523 
1524     if (!curr || !maxElem) {
1525         return;
1526     }
1527 
1528     NodeImpl *residualElem = prev->node;
1529     NodeImpl *blockElem = prevMaxElem ? prevMaxElem->node : current;
1530     RefPtr<NodeImpl> parentElem = elem->node;
1531 
1532     // Check to see if the reparenting that is going to occur is allowed according to the DOM.
1533     // FIXME: We should either always allow it or perform an additional fixup instead of
1534     // just bailing here.
1535     // Example: <p><font><center>blah</font></center></p> isn't doing a fixup right now.
1536     if (!parentElem->childAllowed(blockElem)) {
1537         return;
1538     }
1539 
1540     if (maxElem->node->parentNode() != elem->node && !advancedResidual) {
1541         // Walk the stack and remove any elements that aren't residual style tags.  These
1542         // are basically just being closed up.  Example:
1543         // <font><span>Moo<p>Goo</font></p>.
1544         // In the above example, the <span> doesn't need to be reopened.  It can just close.
1545         HTMLStackElem *currElem = maxElem->next;
1546         HTMLStackElem *prevElem = maxElem;
1547         while (currElem != elem) {
1548             HTMLStackElem *nextElem = currElem->next;
1549             if (!isResidualStyleTag(currElem->id)) {
1550                 prevElem->next = nextElem;
1551                 prevElem->setNode(currElem->node);
1552                 delete currElem;
1553             } else {
1554                 prevElem = currElem;
1555             }
1556             currElem = nextElem;
1557         }
1558 
1559         // We have to reopen residual tags in between maxElem and elem.  An example of this case s:
1560         // <font><i>Moo<p>Foo</font>.
1561         // In this case, we need to transform the part before the <p> into:
1562         // <font><i>Moo</i></font><i>
1563         // so that the <i> will remain open.  This involves the modification of elements
1564         // in the block stack.
1565         // This will also affect how we ultimately reparent the block, since we want it to end up
1566         // under the reopened residual tags (e.g., the <i> in the above example.)
1567         RefPtr<NodeImpl> prevNode = nullptr;
1568         RefPtr<NodeImpl> currNode = nullptr;
1569         currElem = maxElem;
1570         while (currElem->node != residualElem) {
1571             if (isResidualStyleTag(currElem->node->id())) {
1572                 // Create a clone of this element.
1573                 currNode = currElem->node->cloneNode(false);
1574                 currElem->node->close();
1575                 removeForbidden(currElem->id, forbiddenTag);
1576 
1577                 // Change the stack element's node to point to the clone.
1578                 currElem->setNode(currNode.get());
1579 
1580                 // Attach the previous node as a child of this new node.
1581                 if (prevNode) {
1582                     currNode->appendChild(prevNode.get(), exceptionCode);
1583                 } else { // The new parent for the block element is going to be the innermost clone.
1584                     parentElem = currNode;
1585                 }
1586 
1587                 prevNode = currNode;
1588             }
1589 
1590             currElem = currElem->next;
1591         }
1592 
1593         // Now append the chain of new residual style elements if one exists.
1594         if (prevNode) {
1595             elem->node->appendChild(prevNode.get(), exceptionCode);
1596         }
1597     }
1598 
1599     // We need to make a clone of |residualElem| and place it just inside |blockElem|.
1600     // All content of |blockElem| is reparented to be under this clone.  We then
1601     // reparent |blockElem| using real DOM calls so that attachment/detachment will
1602     // be performed to fix up the rendering tree.
1603     // So for this example: <b>...<p>Foo</b>Goo</p>
1604     // The end result will be: <b>...</b><p><b>Foo</b>Goo</p>
1605     //
1606     // Step 1: Remove |blockElem| from its parent, doing a batch detach of all the kids.
1607     SharedPtr<NodeImpl> guard(blockElem);
1608     blockElem->parentNode()->removeChild(blockElem, exceptionCode);
1609 
1610     if (!advancedResidual) {
1611         // Step 2: Clone |residualElem|.
1612         RefPtr<NodeImpl> newNode = residualElem->cloneNode(false); // Shallow clone. We don't pick up the same kids.
1613 
1614         // Step 3: Place |blockElem|'s children under |newNode|.  Remove all of the children of |blockElem|
1615         // before we've put |newElem| into the document.  That way we'll only do one attachment of all
1616         // the new content (instead of a bunch of individual attachments).
1617         NodeImpl *currNode = blockElem->firstChild();
1618         while (currNode) {
1619             NodeImpl *nextNode = currNode->nextSibling();
1620             SharedPtr<NodeImpl> guard(currNode); //Protect from deletion while moving
1621             blockElem->removeChild(currNode, exceptionCode);
1622             newNode->appendChild(currNode, exceptionCode);
1623             currNode = nextNode;
1624 
1625 // TODO - To be replaced.
1626             // Re-register form elements with currently active form, step 1 will have removed them
1627             if (form && currNode && currNode->isGenericFormElement()) {
1628                 HTMLGenericFormElementImpl *e = static_cast<HTMLGenericFormElementImpl *>(currNode);
1629                 form->registerFormElement(e);
1630             }
1631         }
1632 
1633         // Step 4: Place |newNode| under |blockElem|.  |blockElem| is still out of the document, so no
1634         // attachment can occur yet.
1635         blockElem->appendChild(newNode.get(), exceptionCode);
1636     }
1637 
1638     // Step 5: Reparent |blockElem|.  Now the full attachment of the fixed up tree takes place.
1639     parentElem->appendChild(blockElem, exceptionCode);
1640 
1641     // Step 6: Elide |elem|, since it is effectively no longer open.  Also update
1642     // the node associated with the previous stack element so that when it gets popped,
1643     // it doesn't make the residual element the next current node.
1644     HTMLStackElem *currElem = maxElem;
1645     HTMLStackElem *prevElem = nullptr;
1646     while (currElem != elem) {
1647         prevElem = currElem;
1648         currElem = currElem->next;
1649     }
1650     prevElem->next = elem->next;
1651     prevElem->setNode(elem->node);
1652     delete elem;
1653 
1654     // Step 7: Reopen intermediate inlines, e.g., <b><p><i>Foo</b>Goo</p>.
1655     // In the above example, Goo should stay italic.
1656     curr = blockStack;
1657     HTMLStackElem *residualStyleStack = nullptr;
1658     while (curr && curr != endElem) {
1659         // We will actually schedule this tag for reopening
1660         // after we complete the close of this entire block.
1661         NodeImpl *currNode = current;
1662         if (isResidualStyleTag(curr->id)) {
1663             // We've overloaded the use of stack elements and are just reusing the
1664             // struct with a slightly different meaning to the variables.  Instead of chaining
1665             // from innermost to outermost, we build up a list of all the tags we need to reopen
1666             // from the outermost to the innermost, i.e., residualStyleStack will end up pointing
1667             // to the outermost tag we need to reopen.
1668             // We also set curr->node to be the actual element that corresponds to the ID stored in
1669             // curr->id rather than the node that you should pop to when the element gets pulled off
1670             // the stack.
1671             popOneBlock(false);
1672             curr->setNode(currNode);
1673             curr->next = residualStyleStack;
1674             residualStyleStack = curr;
1675         } else {
1676             popOneBlock();
1677         }
1678 
1679         curr = blockStack;
1680     }
1681 
1682     reopenResidualStyleTags(residualStyleStack, nullptr); // FIXME: Deal with stray table content some day
1683     // if it becomes necessary to do so.
1684 }
1685 
1686 void KHTMLParser::reopenResidualStyleTags(HTMLStackElem *elem, DOM::NodeImpl *malformedTableParent)
1687 {
1688     // Loop for each tag that needs to be reopened.
1689     while (elem) {
1690         // Create a shallow clone of the DOM node for this element.
1691         RefPtr<NodeImpl> newNode = elem->node->cloneNode(false);
1692 
1693         // Append the new node. In the malformed table case, we need to insert before the table,
1694         // which will be the last child.
1695         int exceptionCode = 0;
1696         if (malformedTableParent) {
1697             malformedTableParent->insertBefore(newNode.get(), malformedTableParent->lastChild(), exceptionCode);
1698         } else {
1699             current->appendChild(newNode.get(), exceptionCode);
1700         }
1701         // FIXME: Is it really OK to ignore the exceptions here?
1702 
1703         // Now push a new stack element for this node we just created.
1704         pushBlock(elem->id, elem->level);
1705 
1706         // Set our strayTableContent boolean if needed, so that the reopened tag also knows
1707         // that it is inside a malformed table.
1708         blockStack->strayTableContent = malformedTableParent != nullptr;
1709         if (blockStack->strayTableContent) {
1710             inStrayTableContent++;
1711         }
1712 
1713         // Clear our malformed table parent variable.
1714         malformedTableParent = nullptr;
1715 
1716         // Update |current| manually to point to the new node.
1717         setCurrent(newNode.get());
1718 
1719         // Advance to the next tag that needs to be reopened.
1720         HTMLStackElem *next = elem->next;
1721         delete elem;
1722         elem = next;
1723     }
1724 }
1725 
1726 void KHTMLParser::pushBlock(int _id, int _level)
1727 {
1728     HTMLStackElem *Elem = new HTMLStackElem(_id, _level, current, m_inline, blockStack);
1729 
1730     blockStack = Elem;
1731     addForbidden(_id, forbiddenTag);
1732 }
1733 
1734 void KHTMLParser::generateImpliedEndTags(int _id)
1735 {
1736     HTMLStackElem *Elem = blockStack;
1737 
1738     int level = tagPriority(_id);
1739     while (Elem && Elem->id != _id) {
1740         HTMLStackElem *NextElem = Elem->next;
1741         if (endTagRequirement(Elem->id) == DOM::OPTIONAL && Elem->level <= level) {
1742             popOneBlock();
1743         } else {
1744             break;
1745         }
1746         Elem = NextElem;
1747     }
1748 }
1749 
1750 void KHTMLParser::popOptionalBlock(int _id)
1751 {
1752     bool found = false;
1753     HTMLStackElem *Elem = blockStack;
1754 
1755     int level = tagPriority(_id);
1756     while (Elem) {
1757         if (Elem->id == _id) {
1758             found = true;
1759             break;
1760         }
1761         if (Elem->level > level || (endTagRequirement(Elem->id) != DOM::OPTIONAL && !isResidualStyleTag(Elem->id))) {
1762             break;
1763         }
1764         Elem = Elem->next;
1765     }
1766 
1767     if (found) {
1768         generateImpliedEndTags(_id);
1769         popBlock(_id);
1770     }
1771 }
1772 
1773 bool KHTMLParser::isElementInScope(int _id)
1774 {
1775     // HTML5 8.2.3.2
1776     HTMLStackElem *Elem = blockStack;
1777     while (Elem && Elem->id != _id) {
1778         if (DOM::checkIsScopeBoundary(Elem->id)) {
1779             return false;
1780         }
1781         Elem = Elem->next;
1782     }
1783     return Elem;
1784 }
1785 
1786 bool KHTMLParser::isHeadingInScope()
1787 {
1788     HTMLStackElem *Elem = blockStack;
1789     while (Elem && (Elem->id < ID_H1 || Elem->id > ID_H6)) {
1790         if (DOM::checkIsScopeBoundary(Elem->id)) {
1791             return false;
1792         }
1793         Elem = Elem->next;
1794     }
1795     return Elem;
1796 }
1797 
1798 void KHTMLParser::popBlock(int _id)
1799 {
1800     HTMLStackElem *Elem = blockStack;
1801     int maxLevel = 0;
1802 
1803 #ifdef PARSER_DEBUG
1804     qCDebug(KHTML_LOG) << "popBlock(" << getParserPrintableName(_id) << ")";
1805     while (Elem) {
1806         qCDebug(KHTML_LOG) << "   > " << getParserPrintableName(Elem->id);
1807         Elem = Elem->next;
1808     }
1809     Elem = blockStack;
1810 #endif
1811 
1812     while (Elem && (Elem->id != _id)) {
1813         if (maxLevel < Elem->level) {
1814             maxLevel = Elem->level;
1815         }
1816         Elem = Elem->next;
1817     }
1818     if (!Elem) {
1819         return;
1820     }
1821 
1822     if (maxLevel > Elem->level) {
1823         // We didn't match because the tag is in a different scope, e.g.,
1824         // <b><p>Foo</b>.  Try to correct the problem.
1825         if (!isResidualStyleTag(_id)) {
1826             return;
1827         }
1828         return handleResidualStyleCloseTagAcrossBlocks(Elem);
1829     }
1830 
1831     bool isAffectedByStyle = isAffectedByResidualStyle(Elem->id);
1832     HTMLStackElem *residualStyleStack = nullptr;
1833     NodeImpl *malformedTableParent = nullptr;
1834 
1835     Elem = blockStack;
1836 
1837     while (Elem) {
1838         if (Elem->id == _id) {
1839             int strayTable = inStrayTableContent;
1840             popOneBlock();
1841             Elem = nullptr;
1842 
1843             // This element was the root of some malformed content just inside an implicit or
1844             // explicit <tbody> or <tr>.
1845             // If we end up needing to reopen residual style tags, the root of the reopened chain
1846             // must also know that it is the root of malformed content inside a <tbody>/<tr>.
1847             if (strayTable && (inStrayTableContent < strayTable) && residualStyleStack) {
1848                 NodeImpl *curr = current;
1849                 while (curr && curr->id() != ID_TABLE) {
1850                     curr = curr->parentNode();
1851                 }
1852                 malformedTableParent = curr ? curr->parentNode() : nullptr;
1853             }
1854         } else {
1855             // Schedule this tag for reopening
1856             // after we complete the close of this entire block.
1857             NodeImpl *currNode = current;
1858             if (isAffectedByStyle && isResidualStyleTag(Elem->id)) {
1859                 // We've overloaded the use of stack elements and are just reusing the
1860                 // struct with a slightly different meaning to the variables.  Instead of chaining
1861                 // from innermost to outermost, we build up a list of all the tags we need to reopen
1862                 // from the outermost to the innermost, i.e., residualStyleStack will end up pointing
1863                 // to the outermost tag we need to reopen.
1864                 // We also set Elem->node to be the actual element that corresponds to the ID stored in
1865                 // Elem->id rather than the node that you should pop to when the element gets pulled off
1866                 // the stack.
1867                 popOneBlock(false);
1868                 Elem->next = residualStyleStack;
1869                 Elem->setNode(currNode);
1870                 residualStyleStack = Elem;
1871             } else {
1872                 popOneBlock();
1873             }
1874             Elem = blockStack;
1875         }
1876     }
1877 
1878     reopenResidualStyleTags(residualStyleStack, malformedTableParent);
1879 }
1880 
1881 void KHTMLParser::popOneBlock(bool delBlock)
1882 {
1883     HTMLStackElem *Elem = blockStack;
1884 
1885     // we should never get here, but some bad html might cause it.
1886 #ifndef PARSER_DEBUG
1887     if (!Elem) {
1888         return;
1889     }
1890 #else
1891     qCDebug(KHTML_LOG) << "popping block: " << getParserPrintableName(Elem->id) << "(" << Elem->id << ")";
1892 #endif
1893 
1894 #if SPEED_DEBUG < 1
1895     if ((Elem->node != current)) {
1896         if (current->maintainsState() && document) {
1897             document->registerMaintainsState(current);
1898             document->attemptRestoreState(current);
1899         }
1900         current->close();
1901     }
1902 #endif
1903 
1904     removeForbidden(Elem->id, forbiddenTag);
1905 
1906     blockStack = Elem->next;
1907     // we only set inline to false, if the element we close is a block level element.
1908     // This helps getting cases as <p><b>bla</b> <b>bla</b> right.
1909 
1910     m_inline = Elem->m_inline;
1911 
1912     if (current->id() == ID_FORM && form && inStrayTableContent) {
1913         form->setMalformed(true);
1914     }
1915 
1916     setCurrent(Elem->node);
1917 
1918     if (Elem->strayTableContent) {
1919         inStrayTableContent--;
1920     }
1921 
1922     if (delBlock) {
1923         delete Elem;
1924     }
1925 }
1926 
1927 void KHTMLParser::popInlineBlocks()
1928 {
1929     while (blockStack && current->isInline() && current->id() != ID_FONT) {
1930         popOneBlock();
1931     }
1932 }
1933 
1934 void KHTMLParser::freeBlock()
1935 {
1936     while (blockStack) {
1937         popOneBlock();
1938     }
1939     blockStack = nullptr;
1940 }
1941 
1942 void KHTMLParser::createHead()
1943 {
1944     if (head || !doc()->documentElement()) {
1945         return;
1946     }
1947 
1948     head = new HTMLHeadElementImpl(document);
1949     HTMLElementImpl *body = doc()->body();
1950     int exceptioncode = 0;
1951     doc()->documentElement()->insertBefore(head.get(), body, exceptioncode);
1952     if (exceptioncode) {
1953 #ifdef PARSER_DEBUG
1954         qCDebug(KHTML_LOG) << "creation of head failed!!!!:" << exceptioncode;
1955 #endif
1956         delete head.get();
1957         head = nullptr;
1958     }
1959 
1960     // If the body does not exist yet, then the <head> should be pushed as the current block.
1961     if (head && !body) {
1962         pushBlock(head->id(), tagPriority(head->id()));
1963         setCurrent(head.get());
1964     }
1965 }
1966 
1967 NodeImpl *KHTMLParser::handleIsindex(Token *t)
1968 {
1969     NodeImpl *n;
1970     HTMLFormElementImpl *myform = form;
1971     if (!myform) {
1972         myform = new HTMLFormElementImpl(document, true);
1973         n = myform;
1974     } else {
1975         n = new HTMLDivElementImpl(document, ID_DIV);
1976     }
1977     NodeImpl *child = new HTMLHRElementImpl(document);
1978     n->addChild(child);
1979     DOMStringImpl *a = t->attrs ? t->attrs->getValue(ATTR_PROMPT) : nullptr;
1980     DOMString text = i18n("This is a searchable index. Enter search keywords: ");
1981     if (a) {
1982         text = a;
1983     }
1984     child = new TextImpl(document, text.implementation());
1985     n->addChild(child);
1986     child = new HTMLIsIndexElementImpl(document, myform);
1987     static_cast<ElementImpl *>(child)->setAttribute(ATTR_TYPE, "khtml_isindex");
1988     n->addChild(child);
1989     child = new HTMLHRElementImpl(document);
1990     n->addChild(child);
1991 
1992     return n;
1993 }
1994 
1995 void KHTMLParser::startBody()
1996 {
1997     if (inBody) {
1998         return;
1999     }
2000 
2001     inBody = true;
2002 
2003     if (isindex) {
2004         insertNode(isindex, true /* don't decend into this node */);
2005         isindex = nullptr;
2006     }
2007 }