File indexing completed on 2024-05-05 16:10:17
0001 /* 0002 * Copyright (C) 2008 Apple Inc. All Rights Reserved. 0003 * (C) 2008 Germain Garand <germain@ebooksfrance.org> 0004 * 0005 * Redistribution and use in source and binary forms, with or without 0006 * modification, are permitted provided that the following conditions 0007 * are met: 0008 * 1. Redistributions of source code must retain the above copyright 0009 * notice, this list of conditions and the following disclaimer. 0010 * 2. Redistributions in binary form must reproduce the above copyright 0011 * notice, this list of conditions and the following disclaimer in the 0012 * documentation and/or other materials provided with the distribution. 0013 * 0014 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY 0015 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 0016 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 0017 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR 0018 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 0019 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 0020 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 0021 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 0022 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 0023 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 0024 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 0025 */ 0026 0027 #include "htmlprospectivetokenizer.h" 0028 0029 #include <QTime> 0030 #include <QVarLengthArray> 0031 0032 #include "html_headimpl.h" 0033 #include "html_documentimpl.h" 0034 #include "htmlparser.h" 0035 #include "dtd.h" 0036 0037 #include <misc/loader.h> 0038 #include <khtmlview.h> 0039 #include <khtml_part.h> 0040 #include <xml/dom_docimpl.h> 0041 #include <css/csshelper.h> 0042 #include <ecma/kjs_proxy.h> 0043 #include <ctype.h> 0044 #include <assert.h> 0045 #include <QVariant> 0046 #include <stdlib.h> 0047 0048 #include "kentities_p.h" 0049 0050 #define PRELOAD_DEBUG 0 0051 0052 #define U16_TRAIL(sup) (ushort)(((sup)&0x3ff)|0xdc00) 0053 #define U16_LEAD(sup) (ushort)(((sup)>>10)+0xd7c0) 0054 0055 using namespace khtml; 0056 0057 ProspectiveTokenizer::ProspectiveTokenizer(DOM::DocumentImpl *doc) 0058 : m_inProgress(false) 0059 , m_tagName(32) 0060 , m_attributeName(32) 0061 , m_attributeValue(255) 0062 , m_cssRule(16) 0063 , m_cssRuleValue(255) 0064 , m_timeUsed(0) 0065 , m_document(doc) 0066 { 0067 #if PRELOAD_DEBUG 0068 qCDebug(KHTML_LOG) << "CREATING PRELOAD SCANNER FOR" << m_document << m_document->URL().toDisplayString(); 0069 #endif 0070 } 0071 0072 ProspectiveTokenizer::~ProspectiveTokenizer() 0073 { 0074 #if PRELOAD_DEBUG 0075 fprintf(stderr, "DELETING PRELOAD SCANNER FOR %p\n", m_document); 0076 fprintf(stderr, "TOTAL TIME USED %dms\n", m_timeUsed); 0077 #endif 0078 } 0079 0080 void ProspectiveTokenizer::begin() 0081 { 0082 assert(!m_inProgress); 0083 reset(); 0084 m_inProgress = true; 0085 } 0086 0087 void ProspectiveTokenizer::end() 0088 { 0089 assert(m_inProgress); 0090 m_inProgress = false; 0091 } 0092 0093 void ProspectiveTokenizer::reset() 0094 { 0095 m_source.clear(); 0096 0097 m_state = Data; 0098 m_escape = false; 0099 m_contentModel = PCDATA; 0100 m_commentPos = 0; 0101 0102 m_closeTag = false; 0103 m_tagName.clear(); 0104 m_attributeName.clear(); 0105 m_attributeValue.clear(); 0106 m_lastStartTag.clear(); 0107 m_lastStartTagId = 0; 0108 0109 m_urlToLoad = DOMString(); 0110 m_linkIsStyleSheet = false; 0111 m_lastCharacterIndex = 0; 0112 clearLastCharacters(); 0113 0114 m_cssState = CSSInitial; 0115 m_cssRule.clear(); 0116 m_cssRuleValue.clear(); 0117 } 0118 0119 void ProspectiveTokenizer::write(const TokenizerString &source) 0120 { 0121 #if PRELOAD_DEBUG 0122 QTime t; 0123 t.start(); 0124 #endif 0125 0126 tokenize(source); 0127 0128 #if PRELOAD_DEBUG 0129 m_timeUsed += t.elapsed(); 0130 #endif 0131 } 0132 0133 static inline bool isWhitespace(const QChar &c) 0134 { 0135 unsigned short u = c.unicode(); 0136 if (u > 0x20) { 0137 return false; 0138 } 0139 return u == ' ' || u == '\n' || u == '\r' || u == '\t'; 0140 } 0141 0142 inline void ProspectiveTokenizer::clearLastCharacters() 0143 { 0144 memset(m_lastCharacters, 0, lastCharactersBufferSize * sizeof(QChar)); 0145 } 0146 0147 inline void ProspectiveTokenizer::rememberCharacter(QChar c) 0148 { 0149 m_lastCharacterIndex = (m_lastCharacterIndex + 1) % lastCharactersBufferSize; 0150 m_lastCharacters[m_lastCharacterIndex] = c; 0151 } 0152 0153 inline bool ProspectiveTokenizer::lastCharactersMatch(const char *chars, unsigned count) const 0154 { 0155 unsigned pos = m_lastCharacterIndex; 0156 while (count) { 0157 if (chars[count - 1] != m_lastCharacters[pos]) { 0158 return false; 0159 } 0160 --count; 0161 if (!pos) { 0162 pos = lastCharactersBufferSize; 0163 } 0164 --pos; 0165 } 0166 return true; 0167 } 0168 0169 static inline unsigned legalEntityFor(unsigned value) 0170 { 0171 // FIXME There is a table for more exceptions in the HTML5 specification. 0172 if (value == 0 || value > 0x10FFFF || (value >= 0xD800 && value <= 0xDFFF)) { 0173 return 0xFFFD; 0174 } 0175 return value; 0176 } 0177 0178 unsigned ProspectiveTokenizer::consumeEntity(TokenizerString &source, bool ¬EnoughCharacters) 0179 { 0180 enum EntityState { 0181 Initial, 0182 NumberType, 0183 MaybeHex, 0184 Hex, 0185 Decimal, 0186 Named 0187 }; 0188 EntityState entityState = Initial; 0189 unsigned result = 0; 0190 QVarLengthArray<QChar> seenChars; 0191 QVarLengthArray<char> entityName; 0192 0193 while (!source.isEmpty()) { 0194 seenChars.append(*source); 0195 ushort cc = source->unicode(); 0196 switch (entityState) { 0197 case Initial: 0198 if (isWhitespace(cc) || cc == '<' || cc == '&') { 0199 return 0; 0200 } else if (cc == '#') { 0201 entityState = NumberType; 0202 } else if ((cc >= 'a' && cc <= 'z') || (cc >= 'A' && cc <= 'Z')) { 0203 entityName.append(cc); 0204 entityState = Named; 0205 } else { 0206 return 0; 0207 } 0208 break; 0209 case NumberType: 0210 if (cc == 'x' || cc == 'X') { 0211 entityState = MaybeHex; 0212 } else if (cc >= '0' && cc <= '9') { 0213 entityState = Decimal; 0214 result = cc - '0'; 0215 } else { 0216 source.push('#'); 0217 return 0; 0218 } 0219 break; 0220 case MaybeHex: 0221 if (cc >= '0' && cc <= '9') { 0222 result = cc - '0'; 0223 } else if (cc >= 'a' && cc <= 'f') { 0224 result = 10 + cc - 'a'; 0225 } else if (cc >= 'A' && cc <= 'F') { 0226 result = 10 + cc - 'A'; 0227 } else { 0228 source.push(seenChars[1]); 0229 source.push('#'); 0230 return 0; 0231 } 0232 entityState = Hex; 0233 break; 0234 case Hex: 0235 if (cc >= '0' && cc <= '9') { 0236 result = result * 16 + cc - '0'; 0237 } else if (cc >= 'a' && cc <= 'f') { 0238 result = result * 16 + 10 + cc - 'a'; 0239 } else if (cc >= 'A' && cc <= 'F') { 0240 result = result * 16 + 10 + cc - 'A'; 0241 } else if (cc == ';') { 0242 source.advance(); 0243 return legalEntityFor(result); 0244 } else { 0245 return legalEntityFor(result); 0246 } 0247 break; 0248 case Decimal: 0249 if (cc >= '0' && cc <= '9') { 0250 result = result * 10 + cc - '0'; 0251 } else if (cc == ';') { 0252 source.advance(); 0253 return legalEntityFor(result); 0254 } else { 0255 return legalEntityFor(result); 0256 } 0257 break; 0258 case Named: 0259 // This is the attribute only version, generic version matches somewhat differently 0260 while (entityName.size() <= 8) { 0261 if (cc == ';') { 0262 int code; 0263 const bool found = kde_findEntity(entityName.data(), entityName.size(), &code); 0264 if (found) { 0265 source.advance(); 0266 return code; 0267 } 0268 break; 0269 } 0270 if (!(cc >= 'a' && cc <= 'z') && !(cc >= 'A' && cc <= 'Z') && !(cc >= '0' && cc <= '9')) { 0271 int code; 0272 const bool found = kde_findEntity(entityName.data(), entityName.size(), &code); 0273 if (found) { 0274 return code; 0275 } 0276 break; 0277 } 0278 entityName.append(cc); 0279 source.advance(); 0280 if (source.isEmpty()) { 0281 goto outOfCharacters; 0282 } 0283 cc = source->unicode(); 0284 seenChars.append(cc); 0285 } 0286 if (seenChars.size() == 2) { 0287 source.push(seenChars[0]); 0288 } else if (seenChars.size() == 3) { 0289 source.push(seenChars[1]); 0290 source.push(seenChars[0]); 0291 } else { 0292 source.prepend(TokenizerString(QString(seenChars.data(), seenChars.size() - 1))); 0293 } 0294 return 0; 0295 } 0296 source.advance(); 0297 } 0298 outOfCharacters: 0299 notEnoughCharacters = true; 0300 source.prepend(TokenizerString(QString(seenChars.data(), seenChars.size()))); 0301 return 0; 0302 } 0303 0304 void ProspectiveTokenizer::tokenize(const TokenizerString &source) 0305 { 0306 assert(m_inProgress); 0307 0308 m_source.append(source); 0309 0310 // This is a simplified HTML5 Tokenizer 0311 // https://html.spec.whatwg.org/#tokenization 0312 while (!m_source.isEmpty()) { 0313 ushort cc = m_source->unicode(); 0314 switch (m_state) { 0315 case Data: 0316 while (1) { 0317 rememberCharacter(cc); 0318 if (cc == '&') { 0319 if (m_contentModel == PCDATA || m_contentModel == RCDATA) { 0320 m_state = EntityData; 0321 break; 0322 } 0323 } else if (cc == '-') { 0324 if ((m_contentModel == RCDATA || m_contentModel == CDATA) && !m_escape) { 0325 if (lastCharactersMatch("<!--", 4)) { 0326 m_escape = true; 0327 } 0328 } 0329 } else if (cc == '<') { 0330 if (m_contentModel == PCDATA || ((m_contentModel == RCDATA || m_contentModel == CDATA) && !m_escape)) { 0331 m_state = TagOpen; 0332 break; 0333 } 0334 } else if (cc == '>') { 0335 if ((m_contentModel == RCDATA || m_contentModel == CDATA) && m_escape) { 0336 if (lastCharactersMatch("-->", 3)) { 0337 m_escape = false; 0338 } 0339 } 0340 } 0341 emitCharacter(cc); 0342 m_source.advance(); 0343 if (m_source.isEmpty()) { 0344 return; 0345 } 0346 cc = m_source->unicode(); 0347 } 0348 break; 0349 case EntityData: 0350 // should try to consume the entity but we only care about entities in attributes 0351 m_state = Data; 0352 break; 0353 case TagOpen: 0354 if (m_contentModel == RCDATA || m_contentModel == CDATA) { 0355 if (cc == '/') { 0356 m_state = CloseTagOpen; 0357 } else { 0358 m_state = Data; 0359 continue; 0360 } 0361 } else if (m_contentModel == PCDATA) { 0362 if (cc == '!') { 0363 m_state = MarkupDeclarationOpen; 0364 } else if (cc == '/') { 0365 m_state = CloseTagOpen; 0366 } else if (cc >= 'A' && cc <= 'Z') { 0367 m_tagName.clear(); 0368 m_tagName.append(cc + 0x20); 0369 m_closeTag = false; 0370 m_state = TagName; 0371 } else if (cc >= 'a' && cc <= 'z') { 0372 m_tagName.clear(); 0373 m_tagName.append(cc); 0374 m_closeTag = false; 0375 m_state = TagName; 0376 } else if (cc == '>') { 0377 m_state = Data; 0378 } else if (cc == '?') { 0379 m_state = BogusComment; 0380 } else { 0381 m_state = Data; 0382 continue; 0383 } 0384 } 0385 break; 0386 case CloseTagOpen: 0387 if (m_contentModel == RCDATA || m_contentModel == CDATA) { 0388 if (!m_lastStartTag.size()) { 0389 m_state = Data; 0390 continue; 0391 } 0392 if ((unsigned)m_source.length() < m_lastStartTag.size() + 1) { 0393 return; 0394 } 0395 QVector<QChar> tmpString; 0396 QChar tmpChar = 0; 0397 bool match = true; 0398 for (unsigned n = 0; n < m_lastStartTag.size() + 1; n++) { 0399 tmpChar = m_source->toLower(); 0400 if (n < m_lastStartTag.size() && tmpChar != m_lastStartTag[n]) { 0401 match = false; 0402 } 0403 tmpString.append(tmpChar); 0404 m_source.advance(); 0405 } 0406 m_source.prepend(TokenizerString(QString(tmpString.data(), tmpString.size()))); 0407 if (!match || (!isWhitespace(tmpChar) && tmpChar != '>' && tmpChar != '/')) { 0408 m_state = Data; 0409 continue; 0410 } 0411 } 0412 if (cc >= 'A' && cc <= 'Z') { 0413 m_tagName.clear(); 0414 m_tagName.append(cc + 0x20); 0415 m_closeTag = true; 0416 m_state = TagName; 0417 } else if (cc >= 'a' && cc <= 'z') { 0418 m_tagName.clear(); 0419 m_tagName.append(cc); 0420 m_closeTag = true; 0421 m_state = TagName; 0422 } else if (cc == '>') { 0423 m_state = Data; 0424 } else { 0425 m_state = BogusComment; 0426 } 0427 break; 0428 case TagName: 0429 while (1) { 0430 if (isWhitespace(cc)) { 0431 m_state = BeforeAttributeName; 0432 break; 0433 } 0434 if (cc == '>') { 0435 emitTag(); 0436 m_state = Data; 0437 break; 0438 } 0439 if (cc == '/') { 0440 m_state = BeforeAttributeName; 0441 break; 0442 } 0443 if (cc >= 'A' && cc <= 'Z') { 0444 m_tagName.append(cc + 0x20); 0445 } else { 0446 m_tagName.append(cc); 0447 } 0448 m_source.advance(); 0449 if (m_source.isEmpty()) { 0450 return; 0451 } 0452 cc = m_source->unicode(); 0453 } 0454 break; 0455 case BeforeAttributeName: 0456 if (isWhitespace(cc)) 0457 ; 0458 else if (cc == '>') { 0459 emitTag(); 0460 m_state = Data; 0461 } else if (cc >= 'A' && cc <= 'Z') { 0462 m_attributeName.clear(); 0463 m_attributeValue.clear(); 0464 m_attributeName.append(cc + 0x20); 0465 m_state = AttributeName; 0466 } else if (cc == '/') 0467 ; 0468 else { 0469 m_attributeName.clear(); 0470 m_attributeValue.clear(); 0471 m_attributeName.append(cc); 0472 m_state = AttributeName; 0473 } 0474 break; 0475 case AttributeName: 0476 while (1) { 0477 if (isWhitespace(cc)) { 0478 m_state = AfterAttributeName; 0479 break; 0480 } 0481 if (cc == '=') { 0482 m_state = BeforeAttributeValue; 0483 break; 0484 } 0485 if (cc == '>') { 0486 emitTag(); 0487 m_state = Data; 0488 break; 0489 } 0490 if (cc == '/') { 0491 m_state = BeforeAttributeName; 0492 break; 0493 } 0494 if (cc >= 'A' && cc <= 'Z') { 0495 m_attributeName.append(cc + 0x20); 0496 } else { 0497 m_attributeName.append(cc); 0498 } 0499 m_source.advance(); 0500 if (m_source.isEmpty()) { 0501 return; 0502 } 0503 cc = m_source->unicode(); 0504 } 0505 break; 0506 case AfterAttributeName: 0507 if (isWhitespace(cc)) 0508 ; 0509 else if (cc == '=') { 0510 m_state = BeforeAttributeValue; 0511 } else if (cc == '>') { 0512 emitTag(); 0513 m_state = Data; 0514 } else if (cc >= 'A' && cc <= 'Z') { 0515 m_attributeName.clear(); 0516 m_attributeValue.clear(); 0517 m_attributeName.append(cc + 0x20); 0518 m_state = AttributeName; 0519 } else if (cc == '/') { 0520 m_state = BeforeAttributeName; 0521 } else { 0522 m_attributeName.clear(); 0523 m_attributeValue.clear(); 0524 m_attributeName.append(cc); 0525 m_state = AttributeName; 0526 } 0527 break; 0528 case BeforeAttributeValue: 0529 if (isWhitespace(cc)) 0530 ; 0531 else if (cc == '"') { 0532 m_state = AttributeValueDoubleQuoted; 0533 } else if (cc == '&') { 0534 m_state = AttributeValueUnquoted; 0535 continue; 0536 } else if (cc == '\'') { 0537 m_state = AttributeValueSingleQuoted; 0538 } else if (cc == '>') { 0539 emitTag(); 0540 m_state = Data; 0541 } else { 0542 m_attributeValue.append(cc); 0543 m_state = AttributeValueUnquoted; 0544 } 0545 break; 0546 case AttributeValueDoubleQuoted: 0547 while (1) { 0548 if (cc == '"') { 0549 processAttribute(); 0550 m_state = BeforeAttributeName; 0551 break; 0552 } 0553 if (cc == '&') { 0554 m_stateBeforeEntityInAttributeValue = m_state; 0555 m_state = EntityInAttributeValue; 0556 break; 0557 } 0558 m_attributeValue.append(cc); 0559 m_source.advance(); 0560 if (m_source.isEmpty()) { 0561 return; 0562 } 0563 cc = m_source->unicode(); 0564 } 0565 break; 0566 case AttributeValueSingleQuoted: 0567 while (1) { 0568 if (cc == '\'') { 0569 processAttribute(); 0570 m_state = BeforeAttributeName; 0571 break; 0572 } 0573 if (cc == '&') { 0574 m_stateBeforeEntityInAttributeValue = m_state; 0575 m_state = EntityInAttributeValue; 0576 break; 0577 } 0578 m_attributeValue.append(cc); 0579 m_source.advance(); 0580 if (m_source.isEmpty()) { 0581 return; 0582 } 0583 cc = m_source->unicode(); 0584 } 0585 break; 0586 case AttributeValueUnquoted: 0587 while (1) { 0588 if (isWhitespace(cc)) { 0589 processAttribute(); 0590 m_state = BeforeAttributeName; 0591 break; 0592 } 0593 if (cc == '&') { 0594 m_stateBeforeEntityInAttributeValue = m_state; 0595 m_state = EntityInAttributeValue; 0596 break; 0597 } 0598 if (cc == '>') { 0599 processAttribute(); 0600 emitTag(); 0601 m_state = Data; 0602 break; 0603 } 0604 m_attributeValue.append(cc); 0605 m_source.advance(); 0606 if (m_source.isEmpty()) { 0607 return; 0608 } 0609 cc = m_source->unicode(); 0610 } 0611 break; 0612 case EntityInAttributeValue: { 0613 bool notEnoughCharacters = false; 0614 unsigned entity = consumeEntity(m_source, notEnoughCharacters); 0615 if (notEnoughCharacters) { 0616 return; 0617 } 0618 if (entity > 0xFFFF) { 0619 m_attributeValue.append(U16_LEAD(entity)); 0620 m_attributeValue.append(U16_TRAIL(entity)); 0621 } else if (entity) { 0622 m_attributeValue.append(entity); 0623 } else { 0624 m_attributeValue.append('&'); 0625 } 0626 } 0627 m_state = m_stateBeforeEntityInAttributeValue; 0628 continue; 0629 case BogusComment: 0630 while (1) { 0631 if (cc == '>') { 0632 m_state = Data; 0633 break; 0634 } 0635 m_source.advance(); 0636 if (m_source.isEmpty()) { 0637 return; 0638 } 0639 cc = m_source->unicode(); 0640 } 0641 break; 0642 case MarkupDeclarationOpen: { 0643 if (cc == '-') { 0644 if (m_source.length() < 2) { 0645 return; 0646 } 0647 m_source.advance(); 0648 cc = m_source->unicode(); 0649 if (cc == '-') { 0650 m_state = CommentStart; 0651 } else { 0652 m_state = BogusComment; 0653 continue; 0654 } 0655 // If we cared about the DOCTYPE we would test to enter those states here 0656 } else { 0657 m_state = BogusComment; 0658 continue; 0659 } 0660 break; 0661 } 0662 case CommentStart: 0663 if (cc == '-') { 0664 m_state = CommentStartDash; 0665 } else if (cc == '>') { 0666 m_state = Data; 0667 } else { 0668 m_state = Comment; 0669 } 0670 break; 0671 case CommentStartDash: 0672 if (cc == '-') { 0673 m_state = CommentEnd; 0674 } else if (cc == '>') { 0675 m_state = Data; 0676 } else { 0677 m_state = Comment; 0678 } 0679 break; 0680 case Comment: 0681 while (1) { 0682 if (cc == '-') { 0683 m_state = CommentEndDash; 0684 break; 0685 } 0686 m_source.advance(); 0687 if (m_source.isEmpty()) { 0688 return; 0689 } 0690 cc = m_source->unicode(); 0691 } 0692 break; 0693 case CommentEndDash: 0694 if (cc == '-') { 0695 m_state = CommentEnd; 0696 } else { 0697 m_state = Comment; 0698 } 0699 break; 0700 case CommentEnd: 0701 if (cc == '>') { 0702 m_state = Data; 0703 } else if (cc == '-') 0704 ; 0705 else { 0706 m_state = Comment; 0707 } 0708 break; 0709 } 0710 m_source.advance(); 0711 } 0712 } 0713 0714 void ProspectiveTokenizer::processAttribute() 0715 { 0716 DOMStringImpl tagNameDS(DOMStringImpl::ShallowCopy, m_tagName.data(), m_tagName.size()); 0717 LocalName tagLocal = LocalName::fromString(&tagNameDS, IDS_NormalizeLower); 0718 uint tag = tagLocal.id(); 0719 0720 switch (tag) { 0721 case ID_SCRIPT: 0722 case ID_IMAGE: 0723 case ID_IMG: { 0724 DOMStringImpl attrDS(DOMStringImpl::ShallowCopy, m_attributeName.data(), m_attributeName.size()); 0725 LocalName attrLocal = LocalName::fromString(&attrDS, IDS_NormalizeLower); 0726 uint attribute = attrLocal.id(); 0727 if (attribute == localNamePart(ATTR_SRC) && m_urlToLoad.isEmpty()) { 0728 m_urlToLoad = DOMString(m_attributeValue.data(), m_attributeValue.size()).trimSpaces(); 0729 } 0730 break; 0731 } 0732 case ID_LINK: { 0733 DOMStringImpl attrDS(DOMStringImpl::ShallowCopy, m_attributeName.data(), m_attributeName.size()); 0734 LocalName attrLocal = LocalName::fromString(&attrDS, IDS_NormalizeLower); 0735 uint attribute = attrLocal.id(); 0736 if (attribute == localNamePart(ATTR_HREF) && m_urlToLoad.isEmpty()) { 0737 m_urlToLoad = DOMString(m_attributeValue.data(), m_attributeValue.size()).trimSpaces(); 0738 } else if (attribute == localNamePart(ATTR_REL)) { 0739 DOMStringImpl *lowerAttribute = DOMStringImpl(DOMStringImpl::ShallowCopy, m_attributeValue.data(), m_attributeValue.size()).lower(); 0740 QString val = lowerAttribute->string(); 0741 delete lowerAttribute; 0742 m_linkIsStyleSheet = val.contains("stylesheet") && !val.contains("alternate") && !val.contains("icon"); 0743 } 0744 } 0745 default: 0746 break; 0747 } 0748 } 0749 0750 inline void ProspectiveTokenizer::emitCharacter(QChar c) 0751 { 0752 if (m_contentModel == CDATA && m_lastStartTagId == ID_STYLE) { 0753 tokenizeCSS(c); 0754 } 0755 } 0756 0757 inline void ProspectiveTokenizer::tokenizeCSS(QChar c) 0758 { 0759 // We are just interested in @import rules, no need for real tokenization here 0760 // Searching for other types of resources is probably low payoff 0761 switch (m_cssState) { 0762 case CSSInitial: 0763 if (c == '@') { 0764 m_cssState = CSSRuleStart; 0765 } else if (c == '/') { 0766 m_cssState = CSSMaybeComment; 0767 } 0768 break; 0769 case CSSMaybeComment: 0770 if (c == '*') { 0771 m_cssState = CSSComment; 0772 } else { 0773 m_cssState = CSSInitial; 0774 } 0775 break; 0776 case CSSComment: 0777 if (c == '*') { 0778 m_cssState = CSSMaybeCommentEnd; 0779 } 0780 break; 0781 case CSSMaybeCommentEnd: 0782 if (c == '/') { 0783 m_cssState = CSSInitial; 0784 } else if (c == '*') 0785 ; 0786 else { 0787 m_cssState = CSSComment; 0788 } 0789 break; 0790 case CSSRuleStart: 0791 if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) { 0792 m_cssRule.clear(); 0793 m_cssRuleValue.clear(); 0794 m_cssRule.append(c); 0795 m_cssState = CSSRule; 0796 } else { 0797 m_cssState = CSSInitial; 0798 } 0799 break; 0800 case CSSRule: 0801 if (isWhitespace(c)) { 0802 m_cssState = CSSAfterRule; 0803 } else if (c == ';') { 0804 m_cssState = CSSInitial; 0805 } else { 0806 m_cssRule.append(c); 0807 } 0808 break; 0809 case CSSAfterRule: 0810 if (isWhitespace(c)) 0811 ; 0812 else if (c == ';') { 0813 m_cssState = CSSInitial; 0814 } else { 0815 m_cssState = CSSRuleValue; 0816 m_cssRuleValue.append(c); 0817 } 0818 break; 0819 case CSSRuleValue: 0820 if (isWhitespace(c)) { 0821 m_cssState = CSSAferRuleValue; 0822 } else if (c == ';') { 0823 emitCSSRule(); 0824 m_cssState = CSSInitial; 0825 } else { 0826 m_cssRuleValue.append(c); 0827 } 0828 break; 0829 case CSSAferRuleValue: 0830 if (isWhitespace(c)) 0831 ; 0832 else if (c == ';') { 0833 emitCSSRule(); 0834 m_cssState = CSSInitial; 0835 } else { 0836 // FIXME media rules 0837 m_cssState = CSSInitial; 0838 } 0839 break; 0840 } 0841 } 0842 0843 void ProspectiveTokenizer::emitTag() 0844 { 0845 if (m_closeTag) { 0846 m_contentModel = PCDATA; 0847 m_cssState = CSSInitial; 0848 clearLastCharacters(); 0849 return; 0850 } 0851 0852 DOMStringImpl tagNameDS(DOMStringImpl::ShallowCopy, m_tagName.data(), m_tagName.size()); 0853 LocalName tagLocal = LocalName::fromString(&tagNameDS, IDS_NormalizeLower); 0854 uint tag = tagLocal.id(); 0855 m_lastStartTagId = tag; 0856 m_lastStartTag = m_tagName; 0857 0858 switch (tag) { 0859 case ID_TEXTAREA: 0860 case ID_TITLE: 0861 m_contentModel = RCDATA; 0862 break; 0863 case ID_STYLE: 0864 case ID_XMP: 0865 case ID_SCRIPT: 0866 case ID_IFRAME: 0867 case ID_NOEMBED: 0868 case ID_NOFRAMES: 0869 m_contentModel = CDATA; 0870 break; 0871 case ID_NOSCRIPT: 0872 // we wouldn't be here if scripts were disabled 0873 m_contentModel = CDATA; 0874 break; 0875 case ID_PLAINTEXT: 0876 m_contentModel = PLAINTEXT; 0877 break; 0878 default: 0879 m_contentModel = PCDATA; 0880 } 0881 0882 if (m_urlToLoad.isEmpty()) { 0883 m_linkIsStyleSheet = false; 0884 return; 0885 } 0886 0887 CachedObject *o = nullptr; 0888 if (tag == ID_SCRIPT) { 0889 o = m_document->docLoader()->requestScript(m_urlToLoad, m_document->part()->encoding()); 0890 } else if (tag == ID_IMAGE || tag == ID_IMG) { 0891 o = m_document->docLoader()->requestImage(m_urlToLoad); 0892 } else if (tag == ID_LINK && m_linkIsStyleSheet) { 0893 o = m_document->docLoader()->requestStyleSheet(m_urlToLoad, m_document->part()->encoding()); 0894 } 0895 0896 if (o) { 0897 m_document->docLoader()->registerPreload(o); 0898 } 0899 0900 m_urlToLoad = DOMString(); 0901 m_linkIsStyleSheet = false; 0902 } 0903 0904 void ProspectiveTokenizer::emitCSSRule() 0905 { 0906 QString rule(m_cssRule.data(), m_cssRule.size()); 0907 if (rule.toLower() == "import" && !m_cssRuleValue.isEmpty()) { 0908 DOMString value = DOMString(m_cssRuleValue.data(), m_cssRuleValue.size()); 0909 DOMString url = parseURL(value); 0910 if (!url.isEmpty()) { 0911 m_document->docLoader()->registerPreload(m_document->docLoader()->requestStyleSheet(url, m_document->part()->encoding())); // #### charset 0912 } 0913 } 0914 m_cssRule.clear(); 0915 m_cssRuleValue.clear(); 0916 } 0917