File indexing completed on 2024-12-01 04:22:15
0001 # -*- coding: utf-8 -*- 0002 # Copyright 2007-8 Jim Bublitz <jbublitz@nwinternet.com> 0003 # 0004 # This program is free software; you can redistribute it and/or modify 0005 # it under the terms of the GNU General Public License as published by 0006 # the Free Software Foundation; either version 2 of the License, or 0007 # (at your option) any later version. 0008 # 0009 # This program is distributed in the hope that it will be useful, 0010 # but WITHOUT ANY WARRANTY; without even the implied warranty of 0011 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 0012 # GNU General Public License for more details. 0013 # 0014 # You should have received a copy of the GNU General Public License 0015 # along with this program; if not, write to the 0016 # Free Software Foundation, Inc., 0017 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 0018 0019 import ply.lex as lex 0020 import string 0021 0022 states = (('enum', 'inclusive'), ('function', 'inclusive'), ('variable', 'inclusive'),\ 0023 ('block', 'exclusive'), ('sipStmt', 'exclusive'), ('filename', 'exclusive'), \ 0024 ('string', 'exclusive'), ('dottedname', 'exclusive'), ('keypairs','exclusive'), \ 0025 ('keypairs2','exclusive')) 0026 0027 blockTokens = ('AccessCode', 'BIGetCharBufferCode', 'BIGetReadBufferCode', 'BIGetSegCountCode',\ 0028 'BIGetWriteBufferCode', 'ConvertToSubClassCode', 'ConvertToTypeCode',\ 0029 'ConvertFromTypeCode', 'Copying', 'Doc', 'ExportedDoc', 'ExportedHeaderCode',\ 0030 'FinalisationCode', 'GCClearCode', 'GCTraverseCode', 'GetCode', 'MethodCode', 'ModuleCode',\ 0031 'ModuleHeaderCode', 'PickleCode', 'PostInitialisationCode', 'PreInitialisationCode',\ 0032 'RaiseCode', 'SetCode', 'TypeCode', 'TypeHeaderCode', 'UnitCode', 'VirtualCatcherCode',\ 0033 'Makefile', 'PrePythonCode', 'VirtualErrorHandler', \ 0034 'BIGetBufferCode','BIGetCharBufferCode','BIGetReadBufferCode','BIGetSegCountCode',\ 0035 'BIGetWriteBufferCode','BIReleaseBufferCode', 'InitialisationCode','Docstring') 0036 0037 argumentAnnotations = ('AllowNone', 'Array', 'ArraySize', 'Constrained', 'In', 'Out', 'TransferThis',\ 0038 'GetWrapper') 0039 0040 0041 classAnnotations = ('Abstract', 'DelayDtor', 'External', 'NoDefaultCtors') 0042 0043 functionAnnotations = ('Default', 'Factory', 'HoldGIL', 'NewThread', 'NoDerived',\ 0044 'Numeric', 'ReleaseGIL') 0045 0046 valueAnnotations = ('PyName', 'PostHook', 'PreHook', 'AutoGen', 'TypeFlags', 'Encoding') 0047 0048 multipleAnnotations = ('Transfer', 'TransferBack') 0049 0050 stmtTokens = ('Exception', 'MappedType') 0051 0052 sipDirectives = ('CModule', 'CompositeModule', 'ConsolidatedModule', 'End', 'Feature', 'If',\ 0053 'Import', 'Include', 'License', 'Module', 'OptionalInclude', 'Platforms',\ 0054 'SIPOptions', 'Timeline', 'Plugin', 'DefaultEncoding', 'DefaultMetatype',\ 0055 'DefaultSupertype','API') 0056 0057 accessSpecifiers = ("private", "protected", "public", "slots", "signals") 0058 0059 edges = ("class", "struct", "union", "template", "enum", "namespace",\ 0060 "typedef", "operator") 0061 0062 storageQualifiers = ("auto", "register", "static", "extern", "mutable") 0063 0064 functionQualifiers = ("virtual", "explicit") 0065 0066 cvQualifiers = ("const", "volatile") 0067 0068 cppScalarTypes = ("int", "char", "float", "double", "long", "short", "unsigned",\ 0069 "signed", "bool", "void", "wchar_t") 0070 0071 # Operators (+,-,*,/,%,|,&,~,^,<<,>>, ||, &&, !, <, <=, >, >=, ==, !=) 0072 operators = ('PLUS', 'MINUS', 'SLASH', 'PERCENT', 'VBAR', 'CARET', #'LSHIFT', 'RSHIFT', 0073 'LOR', 'LAND', 'BANG', 'LE', 'GE', 'EQ', 'NE', 0074 # Increment/decrement (++,--) 0075 'PLUSPLUS', 'MINUSMINUS', 0076 # Assignment (=, *=, /=, %=, +=, -=, <<=, >>=, &=, ^=, |=) 0077 'TIMESEQUAL', 'DIVEQUAL', 'MODEQUAL', 'PLUSEQUAL', 'MINUSEQUAL', 0078 'LSHIFTEQUAL',#'RSHIFTEQUAL', 0079 'ANDEQUAL', 'XOREQUAL', 'OREQUAL' 0080 ) 0081 0082 cppTokens = edges + cppScalarTypes + functionQualifiers + operators + accessSpecifiers 0083 0084 tokens = cppTokens + sipDirectives + ( 0085 # Literals (identifier, integer constant, float constant, string constant, char const) 0086 'ID', 'ICONST', 'HEXCONST', 'FCONST', 'SCONST', 'CCONST', 'CVQUAL', 0087 'STORAGE', #'PURESFX', 0088 0089 # Expressions we don't parse 0090 #'ENUMINIT', 0091 'ARRAYOP', 'FUNCPTR', 'BLOCK_BODY', 'BLOCK', 'SIPSTMT', 0092 'SIPSTMT_BODY', 'FILENAME', 'licenseAnnotation', 'IG', 'throw', 0093 'FORCE', 'END', 'STRING', 'DOTTEDNAME', 'LINECOMMENT', 'CCOMMENT', 'BLANKLINE', 0094 0095 # Structure dereference (->) 0096 'ARROW', 0097 0098 # Treat separately from other operators 0099 'EQUALS', 'ASTERISK', 'AMPERSAND', 'TILDE', 'LT', 'GT', 0100 # Conditional operator (?) 0101 #'CONDOP', 0102 0103 # Delimeters ( ) [ ] { } , . ; : :: 0104 'LPAREN', 'RPAREN', 0105 'LBRACKET', 'RBRACKET', 0106 'LBRACE', 'RBRACE', 0107 'COMMA', #'PERIOD', 0108 'SEMI', 'COLON', 'COLON2', 0109 0110 # Ellipsis (...) 0111 'ELLIPSIS' 0112 ) 0113 0114 # Completely ignored characters 0115 t_ANY_ignore = ' \t\x0c' 0116 t_block_ignore = '' 0117 0118 # Operators 0119 t_PLUS = r'\+' 0120 t_MINUS = r'-' 0121 t_ASTERISK = r'\*' 0122 t_SLASH = r'/' 0123 t_PERCENT = r'%' 0124 t_VBAR = r'\|' 0125 t_AMPERSAND = r'&' 0126 t_TILDE = r'~' 0127 t_CARET = r'\^' 0128 #t_LSHIFT = r'<<' 0129 #t_RSHIFT = r'>>' 0130 t_LOR = r'\|\|' 0131 t_LAND = r'&&' 0132 t_BANG = r'!' 0133 t_LT = r'<' 0134 t_GT = r'>' 0135 t_LE = r'<=' 0136 t_GE = r'>=' 0137 t_EQ = r'==' 0138 t_NE = r'!=' 0139 0140 # Assignment operators 0141 0142 t_EQUALS = r'=' 0143 t_TIMESEQUAL = r'\*=' 0144 t_DIVEQUAL = r'/=' 0145 t_MODEQUAL = r'%=' 0146 t_PLUSEQUAL = r'\+=' 0147 t_MINUSEQUAL = r'-=' 0148 t_LSHIFTEQUAL = r'<<=' 0149 #t_RSHIFTEQUAL = r'>>=' 0150 t_ANDEQUAL = r'&=' 0151 t_OREQUAL = r'\|=' 0152 t_XOREQUAL = r'^=' 0153 0154 # Increment/decrement 0155 t_PLUSPLUS = r'\+\+' 0156 t_MINUSMINUS = r'--' 0157 0158 # -> 0159 t_ARROW = r'->' 0160 0161 # ? 0162 #t_CONDOP = r'\?' 0163 0164 # Delimeters 0165 t_LPAREN = r'\(' 0166 t_RPAREN = r'\)' 0167 t_LBRACKET = r'\[' 0168 t_RBRACKET = r'\]' 0169 t_LBRACE = r'\{' 0170 t_RBRACE = r'\}' 0171 t_COMMA = r',' 0172 #t_PERIOD = r'\.' 0173 t_SEMI = r';' 0174 t_COLON = r':' 0175 t_ELLIPSIS = r'\.\.\.' 0176 t_COLON2 = r'::' 0177 0178 # Hex Literal 0179 t_HEXCONST = r'0[x|X][\da-fA-F]+' 0180 0181 # Octal Literal 0182 #t_OCTCONST = r'0[0-7]{3}?' 0183 0184 # Integer literal 0185 t_ICONST = r'(0(?![x|X])|[1-9])\d*([uU]|[lL]|[uU][lL]|[lL][uU])?' 0186 0187 # Floating literal 0188 t_FCONST = r'((\d+)(\.\d+)(e(\+|-)?(\d+))? | (\d+)e(\+|-)?(\d+))([lL]|[fF])?' 0189 0190 # Enumerator initializer 0191 #t_enum_ENUMINIT = r'[^,}=]*(?=[,}])+' 0192 0193 # Array operator 0194 t_ARRAYOP = r'[[][^(]*?[]]' 0195 0196 # Function pointer 0197 t_FUNCPTR = r'\(\s*\*' 0198 0199 # String literal 0200 t_SCONST = r'\"([^\\\n]|(\\.))*?\"' 0201 0202 # Character constant 'c' or L'c' 0203 t_CCONST = r'(L)?\'([^\\\n]|(\\.))*?\'' 0204 # Newlines 0205 0206 t_licenseAnnotation = r'(Licensee|Signature|Timestamp|Type)\s*=\s*"\w*?"(?=,|\/)' 0207 def t_ANY_IG (t): 0208 '\/\/ig.?' 0209 t.type = 'IG' 0210 return t 0211 0212 def t_ANY_FORCE (t): 0213 '\/\/force' 0214 t.type = 'FORCE' 0215 return t 0216 0217 def t_ANY_END (t): 0218 '\/\/end' 0219 t.type = 'END' 0220 return t 0221 0222 # some things we ignore (entire line) 0223 def t_friend_class (t): 0224 r'friend\s+class\s+[^;]*;?' 0225 t.lexer.lineno += t.value.count("\n") 0226 0227 def t_friend (t): 0228 r'friend\s' 0229 pass 0230 0231 def t_using (t): 0232 r'using\s+.*;?' 0233 t.lexer.lineno += t.value.count ('\n') 0234 0235 def t_inline (t): 0236 r'inline\s+' 0237 pass 0238 0239 # any sip block - %<blocktype> ... %End 0240 # This has high prio than t_ANY_NEWLINE 0241 def t_block_BLOCK_BODY (t): 0242 r'(.|\n)*?%End' 0243 t.lexer.lineno += t.value.count("\n") 0244 t.lexer.begin ('variable') 0245 return t 0246 0247 def t_ANY_NEWLINE(t): 0248 r'\n' 0249 t.lexer.lineno += 1 0250 pos = t.lexpos-1 0251 while True: 0252 if (pos < 0) or (t.lexer.lexdata[pos]=='\n'): 0253 t.value = t.lexer.lexdata[pos+1:t.lexpos+1] 0254 t.type = 'BLANKLINE' 0255 return t 0256 elif t.lexer.lexdata[pos] not in string.whitespace: 0257 break 0258 pos -= 1 0259 0260 def t_sipStmt_SIPSTMT_BODY (t): 0261 r'(?s){.*?\n};' 0262 t.lexer.lineno += t.value.count("\n") 0263 t.lexer.begin ('variable') 0264 return t 0265 0266 def t_sipStmt_ID (t): 0267 r'[A-Za-z_][\w_]*' 0268 if t.value in cvQualifiers: 0269 t.type = "CVQUAL" 0270 return t 0271 0272 def t_sipStmt_String(t): 0273 r'"[^"]*"' 0274 t.type = "SCONST" 0275 return t 0276 0277 t_sipStmt_SLASH = r'\/' 0278 t_sipStmt_LT = r'<' 0279 t_sipStmt_GT = r'>' 0280 t_sipStmt_COLON2 = r'::' 0281 t_sipStmt_ASTERISK = r'\*' 0282 t_sipStmt_COMMA = r',' 0283 t_sipStmt_EQUALS = r'=' 0284 t_sipStmt_COLON = r':' 0285 t_sipStmt_MINUS = r'-' 0286 t_sipStmt_ICONST = t_ICONST 0287 0288 def t_filename_FILENAME (t): 0289 r'[._A-Za-z][._/A-Za-z0-9\-]*[._A-Za-z0-9]' 0290 t.lexer.begin ('variable') 0291 return t 0292 0293 def t_keypairs_FILENAME (t): 0294 r'[._A-Za-z][._/A-Za-z0-9\-]*[._A-Za-z0-9]' 0295 t.lexer.begin ('variable') 0296 return t 0297 0298 def t_keypairs_LPAREN(t): 0299 r'\(' 0300 t.lexer.begin('keypairs2') 0301 return t 0302 0303 t_keypairs2_FILENAME = r'[._/A-Za-z0-9\-]+' 0304 t_keypairs2_STRING = r'"[^"]*"' 0305 def t_keypairs2_RPAREN(t): 0306 r'\)' 0307 t.lexer.begin ('variable') 0308 return t 0309 0310 t_keypairs2_EQUALS = r'=' 0311 t_keypairs2_COMMA = r',' 0312 0313 def t_string_STRING (t): 0314 r'"[^"]*"' 0315 t.lexer.begin ('variable') 0316 return t 0317 0318 def t_dottedname_DOTTEDNAME (t): 0319 r'([A-Za-z_][A-Za-z0-9_]*\.?)+' 0320 t.lexer.begin ('variable') 0321 return t 0322 0323 def t_SIP_SLOT_CON (t): 0324 r'SIP_SLOT_CON\s*\(.*?\)' 0325 t.type = 'ID' 0326 return t 0327 0328 def t_SIP_SLOT_DIS (t): 0329 r'SIP_SLOT_DIS\s*\(.*?\)' 0330 t.type = 'ID' 0331 return t 0332 0333 def t_ID(t): 0334 r'[A-Za-z_][\w_.]*' 0335 if t.value in edges: 0336 t.type = t.value 0337 elif t.value in cppScalarTypes: 0338 t.type = t.value 0339 elif t.value in accessSpecifiers: 0340 t.type = t.value 0341 elif t.value in storageQualifiers: 0342 t.type = "STORAGE" 0343 elif t.value in functionQualifiers: 0344 t.type = t.value 0345 if t.value == 'virtual' and stateInfo: 0346 stateInfo.virtual = True 0347 elif t.value in cvQualifiers: 0348 t.type = "CVQUAL" 0349 elif t.value in blockTokens: 0350 t.type = 'BLOCK' 0351 t.lexer.begin ('block') 0352 elif t.value in stmtTokens: 0353 pos = t.lexer.lexpos - len (t.value) - 1 0354 if t.lexer.lexdata [pos] == '%': 0355 t.type = 'SIPSTMT' 0356 t.lexer.begin ('sipStmt') 0357 elif t.value in sipDirectives: 0358 pos = t.lexer.lexpos - len (t.value) - 1 0359 if t.lexer.lexdata [pos] == '%': 0360 if t.value in ['Import', 'OptionalInclude']: 0361 t.lexer.begin('filename') 0362 elif t.value in ['Module','API', 'Include']: 0363 t.lexer.begin('keypairs') 0364 elif t.value=='DefaultEncoding': 0365 t.lexer.begin('string') 0366 elif t.value in ['DefaultMetatype','DefaultSupertype']: 0367 t.lexer.begin('dottedname') 0368 t.type = t.value 0369 elif t.value == 'throw': 0370 t.type = t.value 0371 return t 0372 0373 # Capture inline documentation 0374 def t_ANY_DO2COMMENT (t): 0375 r'/\*\*(.|\n)*?\*/' 0376 if stateInfo: 0377 stateInfo.setDoc (t.value) 0378 t.lexer.lineno += t.value.count ('\n') 0379 0380 def t_ANY_DO2CPPCOMMENT (t): 0381 r'///.*' 0382 if stateInfo: 0383 stateInfo.setDoc (t.value) 0384 0385 # Comments 0386 def t_ANY_comment(t): 0387 r'/\*[^\*](.|\n)*?\*/' 0388 t.lineno += t.value.count('\n') 0389 t.type = 'CCOMMENT' 0390 return t 0391 0392 def t_ANY_cppcomment (t): 0393 r'//[^\n]*\n' 0394 t.lexer.lineno += t.value.count ('\n') 0395 t.type = 'LINECOMMENT' 0396 return t 0397 0398 # Preprocessor directive (ignored) 0399 def t_preprocessor(t): 0400 r'\#(.)*?\n' 0401 t.lineno += 1 0402 0403 def t_ANY_error(t): 0404 print("Illegal character %s" % repr(t.value[0])) 0405 t.lexer.skip(1) 0406 0407 0408 sipLexer = lex.lex () 0409 0410 # for collecting inline docs 0411 stateInfo = None 0412 0413 def setStateInfoTarget (si): 0414 global stateInfo 0415 stateInfo = si