File indexing completed on 2024-05-05 16:10:17
0001 /* 0002 * Copyright (C) 2008 Apple Inc. All Rights Reserved. 0003 * 0004 * Redistribution and use in source and binary forms, with or without 0005 * modification, are permitted provided that the following conditions 0006 * are met: 0007 * 1. Redistributions of source code must retain the above copyright 0008 * notice, this list of conditions and the following disclaimer. 0009 * 2. Redistributions in binary form must reproduce the above copyright 0010 * notice, this list of conditions and the following disclaimer in the 0011 * documentation and/or other materials provided with the distribution. 0012 * 0013 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY 0014 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 0015 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 0016 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR 0017 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 0018 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 0019 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 0020 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 0021 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 0022 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 0023 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 0024 */ 0025 0026 #ifndef HTMLPROSPECTIVETOKENIZER_H 0027 #define HTMLPROSPECTIVETOKENIZER_H 0028 0029 #include "misc/stringit.h" 0030 #include <wtf/Vector.h> 0031 0032 namespace DOM 0033 { 0034 class DocumentImpl; 0035 } 0036 0037 namespace khtml 0038 { 0039 0040 class CachedObject; 0041 class CachedObjectClient; 0042 0043 class ProspectiveTokenizer 0044 { 0045 public: 0046 ProspectiveTokenizer(DOM::DocumentImpl *); 0047 ~ProspectiveTokenizer(); 0048 void begin(); 0049 void write(const khtml::TokenizerString &); 0050 void end(); 0051 bool inProgress() const 0052 { 0053 return m_inProgress; 0054 } 0055 0056 static unsigned consumeEntity(khtml::TokenizerString &, bool ¬EnoughCharacters); 0057 0058 private: 0059 void tokenize(const khtml::TokenizerString &); 0060 void reset(); 0061 0062 void emitTag(); 0063 void emitCharacter(QChar); 0064 0065 void tokenizeCSS(QChar); 0066 void emitCSSRule(); 0067 0068 void processAttribute(); 0069 0070 void clearLastCharacters(); 0071 void rememberCharacter(QChar); 0072 bool lastCharactersMatch(const char *, unsigned count) const; 0073 0074 bool m_inProgress; 0075 khtml::TokenizerString m_source; 0076 0077 enum State { 0078 Data, 0079 EntityData, 0080 TagOpen, 0081 CloseTagOpen, 0082 TagName, 0083 BeforeAttributeName, 0084 AttributeName, 0085 AfterAttributeName, 0086 BeforeAttributeValue, 0087 AttributeValueDoubleQuoted, 0088 AttributeValueSingleQuoted, 0089 AttributeValueUnquoted, 0090 EntityInAttributeValue, 0091 BogusComment, 0092 MarkupDeclarationOpen, 0093 CommentStart, 0094 CommentStartDash, 0095 Comment, 0096 CommentEndDash, 0097 CommentEnd 0098 }; 0099 State m_state; 0100 bool m_escape; 0101 enum ContentModel { 0102 PCDATA, 0103 RCDATA, 0104 CDATA, 0105 PLAINTEXT 0106 }; 0107 ContentModel m_contentModel; 0108 unsigned m_commentPos; 0109 State m_stateBeforeEntityInAttributeValue; 0110 0111 static const unsigned lastCharactersBufferSize = 8; 0112 QChar m_lastCharacters[lastCharactersBufferSize]; 0113 unsigned m_lastCharacterIndex; 0114 0115 bool m_closeTag; 0116 WTF::Vector<QChar, 8> m_tagName; 0117 WTF::Vector<QChar, 8> m_attributeName; 0118 WTF::Vector<QChar, 32> m_attributeValue; 0119 WTF::Vector<QChar, 8> m_lastStartTag; 0120 uint m_lastStartTagId; 0121 0122 DOM::DOMString m_urlToLoad; 0123 bool m_linkIsStyleSheet; 0124 0125 enum CSSState { 0126 CSSInitial, 0127 CSSMaybeComment, 0128 CSSComment, 0129 CSSMaybeCommentEnd, 0130 CSSRuleStart, 0131 CSSRule, 0132 CSSAfterRule, 0133 CSSRuleValue, 0134 CSSAferRuleValue 0135 }; 0136 CSSState m_cssState; 0137 WTF::Vector<QChar> m_cssRule; 0138 WTF::Vector<QChar> m_cssRuleValue; 0139 0140 int m_timeUsed; 0141 0142 DOM::DocumentImpl *m_document; 0143 }; 0144 0145 } 0146 0147 #endif