File indexing completed on 2024-12-08 10:18:18

0001 # -*- coding: utf-8 -*-
0002 #     Copyright 2007-8 Jim Bublitz <jbublitz@nwinternet.com>
0003 #
0004 # This program is free software; you can redistribute it and/or modify
0005 # it under the terms of the GNU General Public License as published by
0006 # the Free Software Foundation; either version 2 of the License, or
0007 # (at your option) any later version.
0008 #
0009 # This program is distributed in the hope that it will be useful,
0010 # but WITHOUT ANY WARRANTY; without even the implied warranty of
0011 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
0012 # GNU General Public License for more details.
0013 #
0014 # You should have received a copy of the GNU General Public License
0015 # along with this program; if not, write to the
0016 # Free Software Foundation, Inc.,
0017 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
0018 
0019 import ply.lex as lex
0020 import string
0021 
0022 states = (('enum', 'inclusive'), ('function', 'inclusive'), ('variable', 'inclusive'),\
0023           ('block', 'exclusive'), ('sipStmt', 'exclusive'), ('filename', 'exclusive'), \
0024           ('string', 'exclusive'), ('dottedname', 'exclusive'), ('keypairs','exclusive'), \
0025           ('keypairs2','exclusive'))
0026 
0027 blockTokens = ('AccessCode', 'BIGetCharBufferCode', 'BIGetReadBufferCode', 'BIGetSegCountCode',\
0028                'BIGetWriteBufferCode', 'ConvertToSubClassCode', 'ConvertToTypeCode',\
0029                'ConvertFromTypeCode', 'Copying', 'Doc', 'ExportedDoc', 'ExportedHeaderCode',\
0030                'FinalisationCode', 'GCClearCode', 'GCTraverseCode', 'GetCode', 'MethodCode', 'ModuleCode',\
0031                'ModuleHeaderCode', 'PickleCode', 'PostInitialisationCode', 'PreInitialisationCode',\
0032                'RaiseCode', 'SetCode', 'TypeCode', 'TypeHeaderCode', 'UnitCode', 'VirtualCatcherCode',\
0033                'Makefile', 'PrePythonCode', 'VirtualErrorHandler', \
0034                'BIGetBufferCode','BIGetCharBufferCode','BIGetReadBufferCode','BIGetSegCountCode',\
0035                'BIGetWriteBufferCode','BIReleaseBufferCode', 'InitialisationCode','Docstring')
0036 
0037 argumentAnnotations = ('AllowNone', 'Array', 'ArraySize', 'Constrained', 'In', 'Out', 'TransferThis',\
0038                        'GetWrapper')
0039                       
0040                
0041 classAnnotations = ('Abstract', 'DelayDtor', 'External', 'NoDefaultCtors')
0042 
0043 functionAnnotations = ('Default', 'Factory', 'HoldGIL', 'NewThread', 'NoDerived',\
0044                        'Numeric', 'ReleaseGIL')
0045 
0046 valueAnnotations = ('PyName', 'PostHook', 'PreHook', 'AutoGen', 'TypeFlags', 'Encoding')
0047 
0048 multipleAnnotations = ('Transfer', 'TransferBack')
0049                
0050 stmtTokens = ('Exception', 'MappedType')
0051 
0052 sipDirectives = ('CModule', 'CompositeModule', 'ConsolidatedModule', 'End', 'Feature', 'If',\
0053                  'Import', 'Include', 'License', 'Module', 'OptionalInclude', 'Platforms',\
0054                  'SIPOptions', 'Timeline', 'Plugin', 'DefaultEncoding', 'DefaultMetatype',\
0055                  'DefaultSupertype','API')
0056 
0057 accessSpecifiers = ("private", "protected", "public", "slots", "signals")
0058 
0059 edges = ("class", "struct", "union",  "template", "enum", "namespace",\
0060          "typedef",  "operator")
0061          
0062 storageQualifiers = ("auto", "register", "static", "extern", "mutable")
0063 
0064 functionQualifiers = ("virtual",  "explicit")
0065 
0066 cvQualifiers = ("const", "volatile")
0067 
0068 cppScalarTypes = ("int", "char", "float", "double", "long", "short", "unsigned",\
0069     "signed", "bool", "void", "wchar_t")        
0070 
0071 # Operators (+,-,*,/,%,|,&,~,^,<<,>>, ||, &&, !, <, <=, >, >=, ==, !=)
0072 operators = ('PLUS', 'MINUS', 'SLASH', 'PERCENT', 'VBAR', 'CARET', #'LSHIFT', 'RSHIFT',
0073     'LOR', 'LAND', 'BANG', 'LE', 'GE', 'EQ', 'NE',
0074     # Increment/decrement (++,--)
0075     'PLUSPLUS', 'MINUSMINUS',
0076     # Assignment (=, *=, /=, %=, +=, -=, <<=, >>=, &=, ^=, |=)
0077     'TIMESEQUAL', 'DIVEQUAL', 'MODEQUAL', 'PLUSEQUAL', 'MINUSEQUAL',
0078     'LSHIFTEQUAL',#'RSHIFTEQUAL', 
0079     'ANDEQUAL', 'XOREQUAL', 'OREQUAL'
0080 )
0081 
0082 cppTokens = edges + cppScalarTypes + functionQualifiers + operators + accessSpecifiers
0083 
0084 tokens = cppTokens + sipDirectives + (
0085 # Literals (identifier, integer constant, float constant, string constant, char const)
0086 'ID', 'ICONST', 'HEXCONST', 'FCONST', 'SCONST', 'CCONST', 'CVQUAL', 
0087 'STORAGE', #'PURESFX',
0088 
0089 # Expressions we don't parse
0090 #'ENUMINIT',
0091 'ARRAYOP', 'FUNCPTR', 'BLOCK_BODY', 'BLOCK', 'SIPSTMT', 
0092 'SIPSTMT_BODY', 'FILENAME', 'licenseAnnotation', 'IG', 'throw',
0093 'FORCE', 'END', 'STRING', 'DOTTEDNAME', 'LINECOMMENT', 'CCOMMENT', 'BLANKLINE',
0094 
0095 # Structure dereference (->)
0096 'ARROW',
0097 
0098 # Treat separately from other operators
0099 'EQUALS', 'ASTERISK', 'AMPERSAND', 'TILDE', 'LT', 'GT',
0100 # Conditional operator (?)
0101 #'CONDOP',
0102 
0103 # Delimeters ( ) [ ] { } , . ; : ::
0104 'LPAREN', 'RPAREN',
0105 'LBRACKET', 'RBRACKET',
0106 'LBRACE', 'RBRACE',
0107 'COMMA', #'PERIOD', 
0108 'SEMI', 'COLON', 'COLON2',
0109 
0110 # Ellipsis (...)
0111 'ELLIPSIS'
0112 )
0113 
0114 # Completely ignored characters
0115 t_ANY_ignore           = ' \t\x0c'
0116 t_block_ignore         = ''
0117 
0118 # Operators
0119 t_PLUS             = r'\+'
0120 t_MINUS            = r'-'
0121 t_ASTERISK         = r'\*'
0122 t_SLASH            = r'/'
0123 t_PERCENT          = r'%'
0124 t_VBAR             = r'\|'
0125 t_AMPERSAND        = r'&'
0126 t_TILDE            = r'~'
0127 t_CARET            = r'\^'
0128 #t_LSHIFT           = r'<<'
0129 #t_RSHIFT           = r'>>'
0130 t_LOR              = r'\|\|'
0131 t_LAND             = r'&&'
0132 t_BANG             = r'!'
0133 t_LT               = r'<'
0134 t_GT               = r'>'
0135 t_LE               = r'<='
0136 t_GE               = r'>='
0137 t_EQ               = r'=='
0138 t_NE               = r'!='
0139 
0140 # Assignment operators
0141 
0142 t_EQUALS           = r'='
0143 t_TIMESEQUAL       = r'\*='
0144 t_DIVEQUAL         = r'/='
0145 t_MODEQUAL         = r'%='
0146 t_PLUSEQUAL        = r'\+='
0147 t_MINUSEQUAL       = r'-='
0148 t_LSHIFTEQUAL      = r'<<='
0149 #t_RSHIFTEQUAL      = r'>>='
0150 t_ANDEQUAL         = r'&='
0151 t_OREQUAL          = r'\|='
0152 t_XOREQUAL         = r'^='
0153 
0154 # Increment/decrement
0155 t_PLUSPLUS         = r'\+\+'
0156 t_MINUSMINUS       = r'--'
0157 
0158 # ->
0159 t_ARROW            = r'->'
0160 
0161 # ?
0162 #t_CONDOP           = r'\?'
0163 
0164 # Delimeters
0165 t_LPAREN           = r'\('
0166 t_RPAREN           = r'\)'
0167 t_LBRACKET         = r'\['
0168 t_RBRACKET         = r'\]'
0169 t_LBRACE           = r'\{'
0170 t_RBRACE           = r'\}'
0171 t_COMMA            = r','
0172 #t_PERIOD           = r'\.'
0173 t_SEMI             = r';'
0174 t_COLON            = r':'
0175 t_ELLIPSIS         = r'\.\.\.'
0176 t_COLON2           = r'::'
0177 
0178 # Hex Literal
0179 t_HEXCONST = r'0[x|X][\da-fA-F]+'
0180 
0181 # Octal Literal
0182 #t_OCTCONST = r'0[0-7]{3}?'
0183 
0184 # Integer literal
0185 t_ICONST = r'(0(?![x|X])|[1-9])\d*([uU]|[lL]|[uU][lL]|[lL][uU])?'
0186 
0187 # Floating literal
0188 t_FCONST = r'((\d+)(\.\d+)(e(\+|-)?(\d+))? | (\d+)e(\+|-)?(\d+))([lL]|[fF])?'
0189 
0190 # Enumerator initializer
0191 #t_enum_ENUMINIT = r'[^,}=]*(?=[,}])+'
0192 
0193 # Array operator
0194 t_ARRAYOP = r'[[][^(]*?[]]'
0195 
0196 # Function pointer
0197 t_FUNCPTR = r'\(\s*\*'
0198 
0199 # String literal
0200 t_SCONST = r'\"([^\\\n]|(\\.))*?\"'
0201 
0202 # Character constant 'c' or L'c'
0203 t_CCONST = r'(L)?\'([^\\\n]|(\\.))*?\''
0204 # Newlines
0205 
0206 t_licenseAnnotation = r'(Licensee|Signature|Timestamp|Type)\s*=\s*"\w*?"(?=,|\/)'
0207 def t_ANY_IG (t):
0208     '\/\/ig.?'
0209     t.type = 'IG'
0210     return t
0211     
0212 def t_ANY_FORCE (t):
0213     '\/\/force'
0214     t.type = 'FORCE'
0215     return t
0216 
0217 def t_ANY_END (t):
0218     '\/\/end'
0219     t.type = 'END'
0220     return t
0221 
0222     # some things we ignore (entire line)
0223 def t_friend_class (t):
0224     r'friend\s+class\s+[^;]*;?'
0225     t.lexer.lineno += t.value.count("\n")
0226 
0227 def t_friend (t):
0228     r'friend\s'
0229     pass    
0230 
0231 def t_using (t):
0232     r'using\s+.*;?'
0233     t.lexer.lineno += t.value.count ('\n')
0234 
0235 def t_inline (t):
0236     r'inline\s+'
0237     pass
0238 
0239 # any sip block - %<blocktype> ... %End
0240 # This has high prio than t_ANY_NEWLINE
0241 def t_block_BLOCK_BODY (t):
0242     r'(.|\n)*?%End'
0243     t.lexer.lineno += t.value.count("\n")
0244     t.lexer.begin ('variable')
0245     return t
0246 
0247 def t_ANY_NEWLINE(t):
0248     r'\n'
0249     t.lexer.lineno += 1
0250     pos = t.lexpos-1
0251     while True:
0252         if (pos < 0) or (t.lexer.lexdata[pos]=='\n'):
0253             t.value = t.lexer.lexdata[pos+1:t.lexpos+1]
0254             t.type = 'BLANKLINE'
0255             return t
0256         elif t.lexer.lexdata[pos] not in string.whitespace:
0257             break
0258         pos -= 1
0259         
0260 def t_sipStmt_SIPSTMT_BODY (t):
0261     r'(?s){.*?\n};'
0262     t.lexer.lineno += t.value.count("\n")
0263     t.lexer.begin ('variable')
0264     return t
0265 
0266 def t_sipStmt_ID (t):
0267     r'[A-Za-z_][\w_]*'
0268     if t.value in cvQualifiers:
0269         t.type = "CVQUAL"
0270     return t
0271 
0272 def t_sipStmt_String(t):
0273     r'"[^"]*"'
0274     t.type = "SCONST"
0275     return t
0276 
0277 t_sipStmt_SLASH    = r'\/'
0278 t_sipStmt_LT       = r'<'
0279 t_sipStmt_GT       = r'>'
0280 t_sipStmt_COLON2   = r'::'
0281 t_sipStmt_ASTERISK = r'\*'
0282 t_sipStmt_COMMA    = r','
0283 t_sipStmt_EQUALS   = r'='
0284 t_sipStmt_COLON    = r':'
0285 t_sipStmt_MINUS    = r'-'
0286 t_sipStmt_ICONST   = t_ICONST
0287 
0288 def t_filename_FILENAME (t):
0289     r'[._A-Za-z][._/A-Za-z0-9\-]*[._A-Za-z0-9]'
0290     t.lexer.begin ('variable')
0291     return t
0292 
0293 def t_keypairs_FILENAME (t):
0294     r'[._A-Za-z][._/A-Za-z0-9\-]*[._A-Za-z0-9]'
0295     t.lexer.begin ('variable')
0296     return t
0297     
0298 def t_keypairs_LPAREN(t):
0299     r'\('
0300     t.lexer.begin('keypairs2')
0301     return t
0302     
0303 t_keypairs2_FILENAME = r'[._/A-Za-z0-9\-]+'
0304 t_keypairs2_STRING = r'"[^"]*"'
0305 def t_keypairs2_RPAREN(t):
0306     r'\)'
0307     t.lexer.begin ('variable')
0308     return t
0309 
0310 t_keypairs2_EQUALS = r'='
0311 t_keypairs2_COMMA = r','
0312     
0313 def t_string_STRING (t):
0314     r'"[^"]*"'
0315     t.lexer.begin ('variable')
0316     return t
0317 
0318 def t_dottedname_DOTTEDNAME (t):
0319     r'([A-Za-z_][A-Za-z0-9_]*\.?)+'
0320     t.lexer.begin ('variable')
0321     return t
0322 
0323 def t_SIP_SLOT_CON (t):
0324     r'SIP_SLOT_CON\s*\(.*?\)'
0325     t.type = 'ID'
0326     return t
0327 
0328 def t_SIP_SLOT_DIS (t):
0329     r'SIP_SLOT_DIS\s*\(.*?\)'
0330     t.type = 'ID'
0331     return t
0332 
0333 def t_ID(t):
0334     r'[A-Za-z_][\w_.]*'
0335     if t.value in edges:
0336         t.type = t.value
0337     elif t.value in cppScalarTypes:
0338         t.type = t.value
0339     elif t.value in accessSpecifiers:
0340         t.type = t.value
0341     elif t.value in storageQualifiers:
0342         t.type = "STORAGE"
0343     elif t.value in functionQualifiers:
0344         t.type = t.value
0345         if t.value == 'virtual' and stateInfo:
0346             stateInfo.virtual = True
0347     elif t.value in cvQualifiers:
0348         t.type = "CVQUAL"
0349     elif t.value in blockTokens:
0350         t.type = 'BLOCK'
0351         t.lexer.begin ('block')
0352     elif t.value in stmtTokens:
0353         pos = t.lexer.lexpos - len (t.value) - 1
0354         if t.lexer.lexdata [pos] == '%':
0355             t.type = 'SIPSTMT'
0356             t.lexer.begin ('sipStmt')        
0357     elif t.value in sipDirectives:
0358         pos = t.lexer.lexpos - len (t.value) - 1
0359         if t.lexer.lexdata [pos] == '%':
0360             if t.value in ['Import', 'OptionalInclude']:
0361                 t.lexer.begin('filename')
0362             elif t.value in ['Module','API', 'Include']:
0363                 t.lexer.begin('keypairs')
0364             elif t.value=='DefaultEncoding':
0365                 t.lexer.begin('string')
0366             elif t.value in ['DefaultMetatype','DefaultSupertype']:
0367                 t.lexer.begin('dottedname')
0368             t.type = t.value
0369     elif t.value == 'throw':
0370         t.type = t.value
0371     return t
0372 
0373 # Capture inline documentation
0374 def t_ANY_DO2COMMENT (t):
0375     r'/\*\*(.|\n)*?\*/'
0376     if stateInfo:
0377         stateInfo.setDoc (t.value)
0378     t.lexer.lineno += t.value.count ('\n')
0379 
0380 def t_ANY_DO2CPPCOMMENT (t):
0381     r'///.*'
0382     if stateInfo:
0383         stateInfo.setDoc (t.value)
0384 
0385 # Comments
0386 def t_ANY_comment(t):
0387     r'/\*[^\*](.|\n)*?\*/'
0388     t.lineno += t.value.count('\n')
0389     t.type = 'CCOMMENT'
0390     return t
0391 
0392 def t_ANY_cppcomment (t):
0393     r'//[^\n]*\n'
0394     t.lexer.lineno += t.value.count ('\n')
0395     t.type = 'LINECOMMENT'
0396     return t
0397     
0398 # Preprocessor directive (ignored)
0399 def t_preprocessor(t):
0400     r'\#(.)*?\n'
0401     t.lineno += 1
0402 
0403 def t_ANY_error(t):
0404     print("Illegal character %s" % repr(t.value[0]))
0405     t.lexer.skip(1)
0406 
0407 
0408 sipLexer = lex.lex ()
0409 
0410 # for collecting inline docs
0411 stateInfo = None
0412 
0413 def setStateInfoTarget (si):
0414     global stateInfo
0415     stateInfo = si