File indexing completed on 2024-05-05 16:10:17

0001 /*
0002  * Copyright (C) 2008 Apple Inc. All Rights Reserved.
0003  *
0004  * Redistribution and use in source and binary forms, with or without
0005  * modification, are permitted provided that the following conditions
0006  * are met:
0007  * 1. Redistributions of source code must retain the above copyright
0008  *    notice, this list of conditions and the following disclaimer.
0009  * 2. Redistributions in binary form must reproduce the above copyright
0010  *    notice, this list of conditions and the following disclaimer in the
0011  *    documentation and/or other materials provided with the distribution.
0012  *
0013  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
0014  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
0015  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
0016  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
0017  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
0018  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
0019  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
0020  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
0021  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
0022  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
0023  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
0024  */
0025 
0026 #ifndef HTMLPROSPECTIVETOKENIZER_H
0027 #define HTMLPROSPECTIVETOKENIZER_H
0028 
0029 #include "misc/stringit.h"
0030 #include <wtf/Vector.h>
0031 
0032 namespace DOM
0033 {
0034 class DocumentImpl;
0035 }
0036 
0037 namespace khtml
0038 {
0039 
0040 class CachedObject;
0041 class CachedObjectClient;
0042 
0043 class ProspectiveTokenizer
0044 {
0045 public:
0046     ProspectiveTokenizer(DOM::DocumentImpl *);
0047     ~ProspectiveTokenizer();
0048     void begin();
0049     void write(const khtml::TokenizerString &);
0050     void end();
0051     bool inProgress() const
0052     {
0053         return m_inProgress;
0054     }
0055 
0056     static unsigned consumeEntity(khtml::TokenizerString &, bool &notEnoughCharacters);
0057 
0058 private:
0059     void tokenize(const khtml::TokenizerString &);
0060     void reset();
0061 
0062     void emitTag();
0063     void emitCharacter(QChar);
0064 
0065     void tokenizeCSS(QChar);
0066     void emitCSSRule();
0067 
0068     void processAttribute();
0069 
0070     void clearLastCharacters();
0071     void rememberCharacter(QChar);
0072     bool lastCharactersMatch(const char *, unsigned count) const;
0073 
0074     bool m_inProgress;
0075     khtml::TokenizerString m_source;
0076 
0077     enum State {
0078         Data,
0079         EntityData,
0080         TagOpen,
0081         CloseTagOpen,
0082         TagName,
0083         BeforeAttributeName,
0084         AttributeName,
0085         AfterAttributeName,
0086         BeforeAttributeValue,
0087         AttributeValueDoubleQuoted,
0088         AttributeValueSingleQuoted,
0089         AttributeValueUnquoted,
0090         EntityInAttributeValue,
0091         BogusComment,
0092         MarkupDeclarationOpen,
0093         CommentStart,
0094         CommentStartDash,
0095         Comment,
0096         CommentEndDash,
0097         CommentEnd
0098     };
0099     State m_state;
0100     bool m_escape;
0101     enum ContentModel {
0102         PCDATA,
0103         RCDATA,
0104         CDATA,
0105         PLAINTEXT
0106     };
0107     ContentModel m_contentModel;
0108     unsigned m_commentPos;
0109     State m_stateBeforeEntityInAttributeValue;
0110 
0111     static const unsigned lastCharactersBufferSize = 8;
0112     QChar m_lastCharacters[lastCharactersBufferSize];
0113     unsigned m_lastCharacterIndex;
0114 
0115     bool m_closeTag;
0116     WTF::Vector<QChar, 8> m_tagName;
0117     WTF::Vector<QChar, 8> m_attributeName;
0118     WTF::Vector<QChar, 32> m_attributeValue;
0119     WTF::Vector<QChar, 8> m_lastStartTag;
0120     uint m_lastStartTagId;
0121 
0122     DOM::DOMString m_urlToLoad;
0123     bool m_linkIsStyleSheet;
0124 
0125     enum CSSState {
0126         CSSInitial,
0127         CSSMaybeComment,
0128         CSSComment,
0129         CSSMaybeCommentEnd,
0130         CSSRuleStart,
0131         CSSRule,
0132         CSSAfterRule,
0133         CSSRuleValue,
0134         CSSAferRuleValue
0135     };
0136     CSSState m_cssState;
0137     WTF::Vector<QChar> m_cssRule;
0138     WTF::Vector<QChar> m_cssRuleValue;
0139 
0140     int m_timeUsed;
0141 
0142     DOM::DocumentImpl *m_document;
0143 };
0144 
0145 }
0146 
0147 #endif