twine2/kbindinggenerator/pplexer.py

0001 # -*- coding: utf-8 -*-
0002 #     Copyright 2007-8 Jim Bublitz <jbublitz@nwinternet.com>
0003 #
0004 # This program is free software; you can redistribute it and/or modify
0005 # it under the terms of the GNU General Public License as published by
0006 # the Free Software Foundation; either version 2 of the License, or
0007 # (at your option) any later version.
0008 #
0009 # This program is distributed in the hope that it will be useful,
0010 # but WITHOUT ANY WARRANTY; without even the implied warranty of
0011 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
0012 # GNU General Public License for more details.
0013 #
0014 # You should have received a copy of the GNU General Public License
0015 # along with this program; if not, write to the
0016 # Free Software Foundation, Inc.,
0017 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
0018
0019 import re
0020 import ply.lex as lex
0021 # handles the evaluation of conditionals
0022 from .exprparser import ExpressionParser
0023
0024 newtext   = []
0025 macros    = []
0026 bitBucket = False
0027 sentinel  = False
0028
0029 preprocessor_tokens =  ['cond', 'else', 'endif', 'include', 'define', 'undef', 'line', 'error', 'pragma', 'warning']
0030 tokens = preprocessor_tokens + ['anyline']
0031
0032 values = {}
0033 evaluate = ExpressionParser ().parse
0034
0035 # Completely ignored characters
0036 t_ANY_ignore           = ' \t\x0c'
0037
0038 def stripComment (s):
0039     pos1 = s.find ('\/\*')
0040     pos2 = s.find ('\/\/')
0041     if pos1 > 0 and pos2 > 0:
0042         pos = min (pos1, pos2)
0043     elif pos1 < 0 and pos2 < 0:
0044         pos = -1
0045     else:
0046         pos = max (pos1, pos2)
0047
0048     if pos > 0:
0049         return s [:pos].strip (), s[pos:].strip ()
0050     else:
0051         return s, ''
0052
0053 def t_cond (t):
0054     r'\#\s*(?P<ifType>ifdef\s|ifndef\s|if\s|elif\s)\s*(?P<cond>.*?)\n'
0055
0056     # All conditionals that perform a test are handled here
0057     global newtext
0058     ifType = t.lexer.lexmatch.group ('ifType').strip ()
0059     condition, comment  = stripComment (t.lexer.lexmatch.group ('cond'))
0060
0061     # #if/#elif look for True/False, others for defintion only
0062     # #if defined - 'defined' is handled as an operator by the
0063     # expression parser which evaluates the conditional
0064     if ifType in ['if', 'elif']:
0065         mode = 'calc'
0066     else:
0067         mode = 'def'
0068
0069     ifCondition = evaluate (condition, mode, values)
0070
0071     global bitBucket, sentinel
0072     bitBucket = ((not ifCondition) and (ifType != 'ifndef')) or (ifCondition and (ifType == 'ifndef'))
0073
0074     # remove #define <sentinel>?
0075     sentinel = not bitBucket and ('_h' in condition or '_H' in condition)
0076
0077     # A multiline comment could begin on a preprocessor line
0078     # that's being eliminated here
0079     if bitBucket and comment:
0080         newtext.append (comment + '\n')
0081     else:
0082         newtext.append ('\n')
0083
0084     t.lexer.lineno += 1
0085
0086 def t_else (t):
0087     r'\#\s*else(.*?)\n'  # comments?
0088     global bitBucket, newtext
0089     bitBucket = not bitBucket
0090     t.lexer.lineno += 1
0091     newtext.append ('\n')
0092
0093 def t_endif (t):
0094     r'\#\s*endif(.*?)\n'
0095     global bitBucket, newtext
0096     bitBucket = False
0097     t.lexer.lineno += 1
0098     newtext.append ('\n')
0099
0100 def t_include (t):
0101     r'\#\s*include.*?\n'
0102     global newtext
0103     t.lexer.lineno += 1
0104     newtext.append ('\n')
0105
0106 def t_line (t):
0107     r'\#\s*line.*?\n'
0108     global newtext
0109     t.lexer.lineno += 1
0110     newtext.append ('\n')
0111
0112 def t_error (t):
0113     r'\#\s*error.*?\n'
0114     global newtext
0115     t.lexer.lineno += 1
0116     newtext.append ('\n')
0117
0118 def t_pragma (t):
0119     r'\#\s*pragma.*?\n'
0120     global newtext
0121     t.lexer.lineno += 1
0122     newtext.append ('\n')
0123
0124 def t_warning (t):
0125     r'\#\s*warning.*?\n'
0126     global newtext
0127     t.lexer.lineno += 1
0128     newtext.append ('\n')
0129
0130 def t_undef (t):
0131     r'\#\s*undef\s*(?P<item>.*?)\n'
0132     global macros, values, newtext
0133     item = t.lexer.lexmatch.group ('item').strip ()
0134     if item in values:
0135         macros = [macro for macro in macros if len(macro)==2 or macro[2] != item]
0136         del values [item]
0137     t.lexer.lineno += 1
0138     newtext.append ('\n')
0139
0140 def t_define (t):
0141     r'\#\s*define\s*(?P<first>[\S]+)\s*?(?P<second>[^\n]*?)\n'
0142     global sentinel, values, macros, newtext
0143     a = t.lexer.lexmatch.group ('first')
0144     b = t.lexer.lexmatch.group ('second')
0145
0146     # Append any continuation lines
0147     newlines = 1
0148     start = t.lexer.lexpos
0149     if b and b.endswith ('\\'):
0150         data = t.lexer.lexdata
0151         for i in range (start, len (data)):
0152             if data [i] == '\n':
0153                 t.lexer.lineno += 1
0154                 newlines += 1
0155             if data [i] == '\n' and data [i - 1] != '\\':
0156                 break
0157         t.lexer.lexpos = i + 1
0158         b += data [start:t.lexer.lexpos].replace ('\\\n', ' ')
0159
0160     if '(' in a and not ')' in a:
0161         pos = b.find (')')
0162         if pos < 0:
0163             return
0164         a += b [:pos + 1]
0165         b = b [pos + 1:]
0166
0167     # remove #define <sentinel>
0168     sentinel = sentinel and not b and ('_h' in a or '_H' in a)
0169     if not sentinel:
0170         if not b or '(' in a:
0171             values [a] = ''
0172             macros.insert (0, (re.compile (a), '', a))
0173         else:
0174             values [a] = b
0175             macros.insert (0, (re.compile (a), b.strip (), a))
0176
0177     sentinel = False
0178
0179     newtext.append (newlines *'\n')
0180     t.lexer.lineno += 1
0181
0182 def t_anyline (t):
0183     r'[^\n]*?\n(([^#\n][^\n]*\n)|\n)*'
0184     """
0185     Process anything that's not a preprocesor directive.
0186
0187     Apply all #define macros to each line. Code that has
0188     been #if'd out (bitBucket == True) is replaced by
0189     a single newline for each line removed.
0190     """
0191     global sentinel, newtext
0192     sentinel = False
0193     if not bitBucket:
0194         line = t.value
0195         for m in macros:
0196             line = m[0].sub(m[1], line)
0197         newtext.append (line)
0198         t.lexer.lineno += line.count('\n')
0199     else:
0200         c = t.value.count('\n')
0201         for x in range(c):
0202             newtext.append('\n')
0203         t.lexer.lineno += c
0204
0205 # this needs to be HERE - not above token definitions
0206 ppLexer = lex.lex (debug=0)
0207
0208
0209 def preprocess (text, global_values={}, global_macros=[]):
0210     """
0211     Preprocess a C/C++ header file text
0212
0213     Preprocesses h files - does #define substitutions and
0214     evaluates conditionals to include/exclude code. No
0215     substitutions are performed on preprocessor lines (any
0216     line beginning with '#'). Global #defines are applied
0217     LAST, so they override any local #defines.
0218
0219     All C preprocessor code is stripped, and along with any
0220     lines eliminated conditionally, is replaced with newlines
0221     so that error messages still refer to the correct line in
0222     the original file.
0223
0224     Arguments:
0225     text -- The text to process.
0226     global_values -- Dict mapping string variable names to values.
0227     global_macros -- List of tuples. The first value in a tuple is a
0228                      regular expression object. The second is that
0229                      replacement string which may contain re module
0230                      back references.
0231
0232     Returns the processed string.
0233     """
0234     global newtext, bitBucket, macros, values
0235     newtext   = []
0236     bitBucket = False
0237     macros    = [] + global_macros
0238     values    = {}
0239
0240     values.update (global_values)
0241     if text[-1]!='\n':
0242         text = text + '\n'
0243     ppLexer.input (text)
0244     token = ppLexer.token()
0245     #print(newtext)
0246     #return "".join (fixDoc (newtext))
0247     return "".join(newtext)
0248
0249 def fixDoc (textList):
0250     doReplace = False
0251     doBackReplace = False
0252     nLines    = len (textList)
0253     for i in range (nLines):
0254         if i >= nLines - 1:
0255             break
0256
0257         if textList [i].startswith ('/////'):
0258             textList [i] = '\n'
0259             continue
0260
0261         haveBackCmt = textList [i].find ('///<') >= 0
0262         haveCmt = textList [i].find ('///') >= 0 and not haveBackCmt
0263         if haveBackCmt:
0264             if not doBackReplace:
0265                 doBackReplace = textList [i + 1].strip ().startswith ('///<')
0266                 if doBackReplace:
0267                     textList [i] = textList [i].replace ('///<', '/**<')
0268             else:
0269                 textList [i] = textList [i].replace ('///<', '*')
0270         elif doBackReplace:
0271             textList.insert (i, '*/\n')
0272             doBackReplace = False
0273
0274         if not haveBackCmt and haveCmt:
0275             if not doReplace:
0276                 doReplace = textList [i + 1].strip ().startswith ('///')
0277                 if doReplace:
0278                     textList [i] = textList [i].replace ('///', '/**')
0279             else:
0280                 textList [i] = textList [i].replace ('///', '*')
0281         elif doReplace:
0282             textList.insert (i, '*/\n')
0283             doReplace = False
0284
0285     return textList
0286
0287 if __name__ == '__main__':
0288     text = """#define foo bar"""