kdev-css/parser/tokenizer.flex

0001 %{
0002 /*
0003  * This file is part of the DOM implementation for KDE.
0004  *
0005  * Copyright 2003 Lars Knoll (knoll\@kde.org)
0006  *
0007  * This library is free software; you can redistribute it and/or
0008  * modify it under the terms of the GNU Library General Public
0009  * License as published by the Free Software Foundation; either
0010  * version 2 of the License, or (at your option) any later version.
0011  *
0012  * This library is distributed in the hope that it will be useful,
0013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
0014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
0015  * Library General Public License for more details.
0016  *
0017  * You should have received a copy of the GNU Library General Public License
0018  * along with this library; see the file COPYING.LIB.  If not, write to
0019  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
0020  * Boston, MA 02110-1301, USA.
0021  */
0022
0023 #define DONT_INCLUDE_FLEXLEXER
0024 #include "tokenizer.h"
0025
0026 #include "cssparser.h"
0027
0028 %}
0029
0030 %option noyywrap
0031 %option case-insensitive
0032 %option noyywrap
0033 %option 8bit
0034 %option stack
0035 %option c++
0036 %option yyclass="Css::Tokenizer"
0037 %s mediaquery at_rule block
0038
0039 h               [0-9a-fA-F]
0040 nonascii        [\200-\377]
0041 unicode         \\{h}{1,6}[ \t\r\n\f]?
0042 escape          {unicode}|\\[ -~\200-\377]
0043 nmstart         [_a-zA-Z]|{nonascii}|{escape}
0044 nmchar          [_a-zA-Z0-9-]|{nonascii}|{escape}
0045 string1         \"([\t !#$%&(-~]|\\{nl}|\'|{nonascii}|{escape})*\"
0046 string2         \'([\t !#$%&(-~]|\\{nl}|\"|{nonascii}|{escape})*\'
0047 hexcolor        {h}{3}|{h}{6}
0048
0049 ident           -?{nmstart}{nmchar}*
0050 name            {nmchar}+
0051 num             [0-9]+|[0-9]*"."[0-9]+
0052 intnum          [0-9]+
0053 string          {string1}|{string2}
0054 url             ([!#$%&*-~]|{nonascii}|{escape})*
0055 w               [ \t\r\n\f]*
0056 nl              \n|\r\n|\r|\f
0057 range           \?{1,6}|{h}(\?{0,5}|{h}(\?{0,4}|{h}(\?{0,3}|{h}(\?{0,2}|{h}(\??|{h})))))
0058 nth             (-?[0-9]*n[\+-][0-9]+)|(-?[0-9]*n)
0059
0060 %%
0061
0062 \/\*[^*]*\*+([^/*][^*]*\*+)*\/  /* ignore comments */
0063
0064 [ \t\r\n\f]+            {return Parser::Token_WHITESPACE;}
0065
0066 "<!--"                  {return Parser::Token_SGML_CD;}
0067 "-->"                   {return Parser::Token_SGML_CD;}
0068 "~="                    {return Parser::Token_INCLUDES;}
0069 "|="                    {return Parser::Token_DASHMATCH;}
0070 "^="                    {return Parser::Token_BEGINSWITH;}
0071 "$="                    {return Parser::Token_ENDSWITH;}
0072 "*="                    {return Parser::Token_CONTAINS;}
0073 <mediaquery>"not"       {return Parser::Token_MEDIA_NOT;}
0074 <mediaquery>"only"      {return Parser::Token_MEDIA_ONLY;}
0075 <mediaquery>"and"       {return Parser::Token_MEDIA_AND;}
0076
0077 {string}                {return Parser::Token_STRING;}
0078 {ident}                 {return Parser::Token_IDENT;}
0079 {nth}                   {return Parser::Token_NTH;}
0080
0081
0082 <block>"#"{hexcolor}           {return Parser::Token_HEXCOLOR;}
0083 "#"{ident}              {return Parser::Token_IDSEL;}
0084  /* @rule tokens surrounding css declaration blocks with { } braces must start a BEGIN(at_rule) context */
0085 "@import"               {BEGIN(mediaquery); return Parser::Token_IMPORT_SYM;}
0086 "@page"                 {BEGIN(at_rule); return Parser::Token_PAGE_SYM;}
0087 "@media"                {BEGIN(mediaquery); return Parser::Token_MEDIA_SYM;}
0088 "@font-face"            {BEGIN(at_rule); return Parser::Token_FONT_FACE_SYM;}
0089 "@charset"              {BEGIN(at_rule); return Parser::Token_CHARSET_SYM;}
0090 "@namespace"        {BEGIN(at_rule); return Parser::Token_NAMESPACE_SYM; }
0091
0092 "!"{w}"important"         {return Parser::Token_IMPORTANT_SYM;}
0093
0094 {num}em                 {return Parser::Token_EMS;}
0095 {num}__qem              {return Parser::Token_QEMS;} /* quirky ems */
0096 {num}ex                 {return Parser::Token_EXS;}
0097 {num}px                 {return Parser::Token_PXS;}
0098 {num}cm                 {return Parser::Token_CMS;}
0099 {num}mm                 {return Parser::Token_MMS;}
0100 {num}in                 {return Parser::Token_INS;}
0101 {num}pt                 {return Parser::Token_PTS;}
0102 {num}pc                 {return Parser::Token_PCS;}
0103 {num}deg                {return Parser::Token_DEGS;}
0104 {num}rad                {return Parser::Token_RADS;}
0105 {num}grad               {return Parser::Token_GRADS;}
0106 {num}ms                 {return Parser::Token_MSECS;}
0107 {num}s                  {return Parser::Token_SECS;}
0108 {num}Hz                 {return Parser::Token_HERZ;}
0109 <mediaquery>{num}dpi    {return Parser::Token_DPI;}
0110 <mediaquery>{num}dpcm   {return Parser::Token_DPCM;}
0111 {num}kHz                {return Parser::Token_KHERZ;}
0112 {num}{ident}            {return Parser::Token_DIMEN;}
0113 {num}%                  {return Parser::Token_PERCENTAGE;}
0114 {intnum}                {return Parser::Token_INTEGER;}
0115 {num}                   {return Parser::Token_FLOAT;}
0116
0117
0118 "not("                  {return Parser::Token_NOTFUNCTION;}
0119 "url("{w}{string}{w}")" {return Parser::Token_URI;}
0120 "url("{w}{url}{w}")"    {return Parser::Token_URI;}
0121 {ident}"("              {return Parser::Token_FUNCTION;}
0122
0123 U\+{range}              {return Parser::Token_UNICODERANGE;}
0124 U\+{h}{1,6}-{h}{1,6}    {return Parser::Token_UNICODERANGE;}
0125
0126 <INITIAL>"{"            {BEGIN(block); return Parser::Token_LBRACE;}
0127 <at_rule>"{"            {BEGIN(block); return Parser::Token_LBRACE;}
0128 <at_rule>";"            {BEGIN(block); return Parser::Token_SEMICOLON;}
0129 <block>"}"              {BEGIN(block); return Parser::Token_RBRACE;}
0130 <mediaquery>"{"         {BEGIN(block); return Parser::Token_LBRACE;}
0131 <mediaquery>";"         {BEGIN(block); return Parser::Token_SEMICOLON;}
0132
0133 ,                       {BEGIN(block); return Parser::Token_COMMA;}
0134 \(                      {BEGIN(block); return Parser::Token_LPAREN;}
0135 \)                      {BEGIN(block); return Parser::Token_RPAREN;}
0136 \{                      {BEGIN(block); return Parser::Token_LBRACE;}
0137 \}                      {BEGIN(block); return Parser::Token_RBRACE;}
0138 \[                      {BEGIN(block); return Parser::Token_LBRACKET;}
0139 \]                      {BEGIN(block); return Parser::Token_RBRACKET;}
0140 :                       {BEGIN(block); return Parser::Token_COLON;}
0141 \*                      {BEGIN(block); return Parser::Token_STAR;}
0142 ;                       {BEGIN(block); return Parser::Token_SEMICOLON;}
0143 \.                      {BEGIN(block); return Parser::Token_DOT;}
0144 =                       {BEGIN(block); return Parser::Token_EQUALS;}
0145 \+                      {BEGIN(block); return Parser::Token_PLUS;}
0146 \-                      {BEGIN(block); return Parser::Token_MINUS;}
0147 \/                       {BEGIN(block); return Parser::Token_DIVIDE;}
0148
0149  /* add all tokens that match here above */
0150 .                       {BEGIN(block); return Parser::Token_INVALID;}
0151 %%
0152
0153
0154 namespace Css
0155 {
0156
0157 Tokenizer::Tokenizer( KDevPG::TokenStream *tokenStream, const QByteArray &contents )
0158 {
0159     restart( tokenStream, contents );
0160 }
0161
0162 Tokenizer::Tokenizer( KDevPG::TokenStream *tokenStream, const char* contents )
0163 {
0164     restart( tokenStream, contents );
0165 }
0166
0167 Tokenizer::Tokenizer( KDevPG::TokenStream *tokenStream, const QString &contents )
0168 {
0169     restart( tokenStream, contents.toLatin1() );
0170 }
0171
0172 void Tokenizer::restart( KDevPG::TokenStream *tokenStream, const QByteArray &contents )
0173 {
0174     m_locationTable = tokenStream->locationTable();
0175     m_contents = contents;
0176     m_tokenBegin = 0;
0177     m_tokenEnd = -1;
0178     m_currentOffset = 0;
0179
0180     YY_NEW_FILE;
0181     BEGIN(INITIAL); // is not set automatically by yyrestart()
0182 }
0183
0184 // reads a character, and returns 1 as the number of characters read
0185 // (or 0 when the end of the string is reached)
0186 int Tokenizer::LexerInput( char *buf, int /*max_size*/ )
0187 {
0188     if (m_currentOffset >= m_contents.length()) return 0;
0189
0190     char c = m_contents.at(m_currentOffset++);
0191
0192     switch(c)
0193     {
0194     case '\r':
0195         c = '\n'; // only have one single line break character: '\n'
0196         if ( m_currentOffset + 1 < m_contents.size() && m_contents.at(m_currentOffset + 1) == '\n' )
0197         {
0198             m_currentOffset++;
0199             m_tokenEnd++;
0200         }
0201
0202         // fall through
0203     case '\n':
0204         m_locationTable->newline( m_currentOffset - 1 );
0205         break;
0206
0207     default:
0208         break;
0209     }
0210
0211     buf[0] = c;
0212     return 1;
0213 }
0214
0215 } // end of namespace Css
0216