File indexing completed on 2024-05-12 16:09:14
0001 #!/usr/bin/env python 0002 # -*- Coding:utf-8 -*- 0003 0004 # SPDX-FileCopyrightText: 2013 Sven Brauch <svenbrauch@googlemail.com> 0005 # SPDX-License-Identifier: GPL-2.0-or-later 0006 # The script output is not copyrighted, use it for whatever you want. 0007 0008 # WARNING: This script does things which can cause bad stuff to happen 0009 # to your system in case you execute it on a module which has been 0010 # engineered to be mailicious. 0011 # I thus recommend to run this script as a user with minimal privileges 0012 # (i.e. not as yourself), or even in a chroot. 0013 # In any case, I'm not responsible for any damage caused by this script. 0014 0015 # This script will dump a SINGLE MODULE to a python "header" file. 0016 # It will read one module, and give you one output file. 0017 # Any submodules of the imported object (anything with type "module") 0018 # will be ignored by this script, if you want to dump those, 0019 # you will have to manually (or with a script) generate a directory 0020 # structure and re-run this script. 0021 import re 0022 import traceback 0023 0024 import sys 0025 import types 0026 import inspect 0027 import importlib 0028 import builtins 0029 0030 def debugmsg(message): 0031 sys.stderr.write(message + "\n") 0032 sys.stderr.flush() 0033 0034 def structseq_to_py(seq, name="INSERT_NAME"): 0035 """Turns a "structseq" object to a python pseudoclass.""" 0036 sseq = str(seq) 0037 sseq = '('.join(sseq.split('(')[1:]) 0038 sseq = ')'.join(sseq.split(')')[:1]) 0039 print("class {0}:".format(name)) 0040 for item in sseq.split(','): 0041 item = item.strip() 0042 key, value = item.split('=') 0043 print(indent("{0} = {1}".format(key, value))) 0044 0045 def indent(code, depth=4): 0046 code = code.split('\n') 0047 code = [" "*depth + line for line in code] 0048 return '\n'.join(code) 0049 0050 def clearIndent(code): 0051 assert isinstance(code, str) 0052 code = code.split('\n') 0053 code = [line.strip() for line in code] 0054 return '\n'.join(code) 0055 0056 def syntaxValid(code): 0057 try: 0058 compile(code, "<no file>", 'exec') 0059 except SyntaxError: 0060 return False 0061 return True 0062 0063 def sanitize(expr): 0064 assert isinstance(expr, str) 0065 replace = { 0066 '*':'', '[':'', ']':'', 0067 'from':'_from', 'class':'_class', '-':'_', 'lambda':'_lambda', "raise":"_raise", 0068 '\\':'ESC', ' ':'', "<":'"', ">":'"', "self,":"", "self":"", 0069 ",,":",", '...':'more_args', '+':"plus" 0070 } 0071 result = expr 0072 for before, after in replace.items(): 0073 result = result.replace(before, after) 0074 result = re.sub(r"\.\d", "_", result) 0075 result = result.replace("=,", "=[],").replace("=)", "=[])") 0076 return result 0077 0078 def strict_sanitize(expr): 0079 assert isinstance(expr, str) 0080 expr = sanitize(expr) 0081 forbidden = ["=()", '(', ')', '"', "'", " ", ",", "|", "%", '#', '{', '}'] 0082 for char in forbidden: 0083 expr = expr.replace(char, "") 0084 if len(expr) == 0: 0085 expr = "_" 0086 if expr[-1] == '.': 0087 expr = expr[:-1] 0088 if expr == ".": 0089 return "None" 0090 if len(expr) > 0 and expr[0].isdigit(): 0091 expr = "_" + expr 0092 return expr 0093 0094 def isSpace(char): 0095 return char == ' ' or char == '\t' 0096 0097 def removeAtCorner(string, char, direction): 0098 i = 0 0099 assert direction in ['<', '>'] 0100 if direction == '>': 0101 iterator = range(0, len(string)) 0102 def r(s, a): return s[a-1:] 0103 if direction == '<': 0104 iterator = [len(string)-x-1 for x in range(0, len(string))] 0105 def r(s, a): return s[:a+1] 0106 0107 atBeginning = True 0108 for i in iterator: 0109 if isSpace(string[i]) and atBeginning: 0110 continue 0111 elif string[i] == char: 0112 atBeginning = False 0113 else: 0114 return r(string, i) 0115 return str() 0116 0117 likely_substitutions = { 0118 "integer": "int", 0119 "string": "str", 0120 "long": "int", 0121 "dictionary": "dict", 0122 "double": "float", 0123 "scalar": "float", 0124 "array_like": "ndarray" 0125 } 0126 0127 def do_type_subst(t): 0128 if t in likely_substitutions: 0129 return likely_substitutions[t] 0130 return t 0131 0132 def get_indent(string): 0133 string = string.split("\n")[0] 0134 indent = 0 0135 for char in string: 0136 if char in [' ', '\t']: 0137 indent += 1 0138 else: 0139 return indent 0140 return 0 0141 0142 def remove_indent(string): 0143 if type(string) == str: 0144 string = string.split("\n") 0145 max_remove_indent = get_indent(string[0]) 0146 result = "" 0147 for line in string: 0148 for offset in range(0, len(line)): 0149 if line[offset] not in [' ', '\t'] or offset > max_remove_indent: 0150 result += line[offset:] + "\n" 0151 break 0152 return result 0153 else: 0154 return string 0155 0156 def guess_return_type_from_synopsis(synopsis, root): 0157 container = "" 0158 for item in re.finditer("return", synopsis, re.I): 0159 scan = synopsis[item.start():item.end()+60] 0160 def apply_container(value): 0161 if len(container) > 0: 0162 return "{0}([{1}])".format(container, value) 0163 else: 0164 return value 0165 if "ndarray" in scan.split() or 'array_like' in scan.split() or 'array_type' in scan.split(): 0166 # hack to make "complex ndarray" work properly 0167 return "ndarray()" 0168 for word in scan.split(): 0169 if word.find('.') != -1 and word != '...': 0170 break # end of sentence -- stop 0171 word = word.replace(',', '') 0172 if word in ["none", "None"]: 0173 return "None" 0174 if word in ["True", "False", "true", "false", "bool", "boolean"]: 0175 return apply_container("bool()") 0176 if word in ["dict", "dictionary"]: 0177 return "dict()" 0178 if word in ["string", "str", "represenation"]: 0179 return "str()" 0180 if word in ["list", "iterable"]: 0181 container = "list" 0182 continue 0183 if word in ["set"]: 0184 container = "set" 0185 continue 0186 if word in ["number", "int", "integer"]: 0187 return apply_container("int()") 0188 if word in ["float", "ratio", "fraction"]: 0189 return apply_container("float()") 0190 if hasattr(root.module, word) and type(getattr(root.module, word) == type(object)): 0191 return apply_container(word + "()") 0192 if word[-1] == "s" and hasattr(root.module, word[:-1]) and type(getattr(root.module, word[:-1]) == type(object)): 0193 # plural form, "list of ints" 0194 return apply_container(word[:-1] + "()") 0195 if hasattr(builtins, word) and type(getattr(builtins, word)) == type(object): 0196 return apply_container(word + "()") 0197 if len(container) > 0: 0198 return container + "()" 0199 return "None" 0200 0201 def parse_numpy_like_docstring(docstring, funcname, root, needSelfArg=False): 0202 selflist = ["self"] if needSelfArg else [] 0203 if type(docstring) == str: 0204 indent = 0 0205 atLineBeginning = True 0206 paramListBegin = paramListEnd = False 0207 returnTypeBegin = returnTypeEnd = False 0208 atPartBeginning = 2 0209 returnType = "None" 0210 for offset in range(0, len(docstring)): 0211 if docstring[offset] == "\n": 0212 indent = 0 0213 if docstring[offset] in [' ', '\t'] and atLineBeginning: 0214 indent += 1 0215 else: 0216 atLineBeginning = False 0217 0218 if paramListEnd is False: 0219 if docstring[offset:].startswith("Parameters"): 0220 paramListBegin = offset 0221 if paramListBegin is not False and docstring[offset] == "\n" and atPartBeginning != 0: 0222 atPartBeginning -= 1 0223 if docstring[offset:].startswith("---") and atPartBeginning == 0: 0224 paramListEnd = offset 0225 break 0226 if returnTypeEnd == False: 0227 if docstring[offset:].startswith("Returns"): 0228 returnTypeBegin = offset 0229 relevantPart = docstring[paramListBegin:paramListEnd].split("\n")[2:] 0230 if returnTypeBegin is not False: 0231 try: 0232 line = docstring[returnTypeBegin:].split('\n')[2] 0233 ret = line.split(' : ')[1] 0234 if ret.find(' or ') != -1: 0235 # unsure return type 0236 returnTypes = map(strict_sanitize, [item.split(' ')[0] for item in ret.split(' or ')]) 0237 returnType = ''.join(["{0}() if False else ".format(do_type_subst(t)) for t in returnTypes[:-1]]) \ 0238 + do_type_subst(str(returnTypes[-1])) + "()" 0239 else: 0240 if 'ndarray' in ret.split() or 'array_like' in ret.split() or 'array_type' in ret.split(): 0241 returnType = "ndarray()" 0242 else: 0243 returnTypeLine = ret.split(' ')[0].split(',')[0] 0244 returnType = do_type_subst(strict_sanitize(returnTypeLine)) + "()" 0245 except IndexError: 0246 returnType = guess_return_type_from_synopsis(docstring[returnTypeBegin:], root) 0247 if len(relevantPart): 0248 firstIndent = get_indent(relevantPart[0]) 0249 parameter_name_list = [] 0250 for line_index in range(0, len(relevantPart)): 0251 if get_indent(relevantPart[line_index]) == firstIndent: 0252 s = relevantPart[line_index].split(' : ') 0253 if len(s) == 2: 0254 name = s[0] 0255 type_string = s[1] 0256 doc_for_param = None # TODO extract this, and display it in some way... or not 0257 parameter_name = strict_sanitize(name) 0258 if parameter_name.find('...') != -1: 0259 parameter_name = 'more' 0260 parameter_name = parameter_name.replace('`', '') 0261 parameter_name_list.append(parameter_name) 0262 return ', '.join(selflist + parameter_name_list), do_type_subst(returnType) 0263 else: 0264 try: 0265 firstType = docstring.split("\n")[0].split('.')[-1] 0266 if firstType.find(funcname) == -1: 0267 raise IndexError() 0268 firstType = firstType.split('->')[0] 0269 firstType = firstType.split('(')[1:] 0270 firstType = ')'.join('('.join(firstType).split(')')[:-1]) 0271 paramList = firstType.split(',') 0272 cleanedParamList = [] 0273 for item in paramList: 0274 if item.find('...') == -1: 0275 cleanedParamList.append(item) 0276 return ', '.join(selflist + [strict_sanitize(x) for x in cleanedParamList]), "None" 0277 except IndexError: 0278 return "self" if needSelfArg else "", "None" 0279 else: 0280 return "self" if needSelfArg else "", "None" 0281 0282 def parse_synopsis(funcdef, original, root, needSelfArg=False): 0283 """Parse a function description in the following format: 0284 module.func(param1, param2, [optional_param1 = default1, [optional_param2 = default2]]) -> return_type 0285 This tries to be as error-prone as possible in order to convert everything into a valid parameter list.""" 0286 # first, take the parts before and after the arrow: 0287 assert isinstance(funcdef, str) 0288 funcdef = funcdef.replace("<==>", " -> ") 0289 s = funcdef.split(' -> ') 0290 definition = s[0] 0291 returnType = s[1] if len(s) > 1 else "None" 0292 # Sometimes, people do fancy stuff in the return type, like "... -> ndarray or None if arg is False" 0293 # Thus, we only use the first word... well. 0294 returnType = strict_sanitize(returnType.split(' ')[0]) 0295 if returnType in likely_substitutions: 0296 returnType = likely_substitutions[returnType] 0297 if returnType != 'None': 0298 returnType += "()" 0299 if returnType == 'None' or returnType == '_()': 0300 returnType = guess_return_type_from_synopsis(original, root) 0301 # Okay, now the fun part: parse the parameter list 0302 inParamList = False 0303 brackets = 0 0304 paramList = "" 0305 for char in definition: 0306 if char == '(' and not inParamList: 0307 inParamList = True 0308 elif char == '(': 0309 brackets += 1 0310 if char == ')' and brackets > 0: 0311 brackets -= 1 0312 elif char == ')' and inParamList: 0313 break 0314 if inParamList and char not in '()': 0315 paramList += char 0316 paramList = paramList.split(',') 0317 resultingParamList = [] 0318 atDefault = False 0319 for param in paramList: 0320 defaultValue = None 0321 # extract the name of the param 0322 param = param.replace(' ', '').replace('\t', '') 0323 # check for default values 0324 if removeAtCorner(param, '[', '>') != removeAtCorner(param, ' ', '>') or param.find('=') != -1: 0325 # default parameter list starts or continues with this parameter 0326 atDefault = True 0327 if atDefault: 0328 defaultValue = "None" 0329 if param.find('=') != -1: 0330 # default value was provided; clean trailing "[" and "]" chars 0331 defaultValue = removeAtCorner(removeAtCorner(param.split('=')[1], ']', '<'), '[', '<') 0332 param = param.split('=')[0] 0333 if len(str(defaultValue)) == 0 or str(defaultValue).isspace(): 0334 # just write anything, otherwise it's syntactically invalid 0335 defaultValue = "None" 0336 if removeAtCorner(param, '[', '<') != removeAtCorner(param, ' ', '<'): 0337 # default parameter list starts or continues after this parameter 0338 atDefault = True 0339 param = strict_sanitize(param) 0340 if param == '': 0341 continue 0342 if defaultValue: 0343 resultingParamList.append("{0}={1}".format(param, sanitize(defaultValue))) 0344 else: 0345 resultingParamList.append(param) 0346 if needSelfArg: 0347 # we're in a class, make sure there's a "self" 0348 if len(resultingParamList) == 0 or ( [resultingParamList[0].find(x) for x in ["self", "cls"]] == [-1, -1] ): 0349 resultingParamList.insert(0, "self") 0350 return ', '.join(resultingParamList), returnType 0351 0352 0353 class ModuleDumper: 0354 def __init__(self, module, startIndent=0, special_hints=dict()): 0355 self.module = module 0356 self.code = str() 0357 self.indentDepth = startIndent 0358 self.special_hints = special_hints 0359 0360 def increaseIndent(self): 0361 self.indentDepth += 4 0362 0363 def decreaseIndent(self): 0364 self.indentDepth -= 4 0365 if self.indentDepth < 0: 0366 self.indentDepth = 0 0367 0368 def emit(self, code): 0369 print(indent(code, self.indentDepth)) 0370 0371 def dump(self): 0372 debugmsg("Processing module {0}".format(self.module.__name__)) 0373 for member, value in inspect.getmembers(self.module): 0374 dumper = dumperForObject(value, member, self) 0375 dumper.dump() 0376 0377 class ScalarDumper: 0378 def __init__(self, name, value, root): 0379 self.name = name 0380 self.value = value 0381 self.root = root 0382 0383 def dump(self): 0384 value = type(self.value).__name__ + "()" if self.value is not None else "None" 0385 if value == 'module()': 0386 # numpy fix 0387 return 0388 self.root.emit("{0} = {1}".format(self.name, value)) 0389 0390 def pick_better_return_value(v1, v2): 0391 if v1 == "None": 0392 return v1 0393 return v2 0394 0395 def pick_better_arglist(s1, s2): 0396 # return the one with more arguments 0397 if s1.count(',') > s2.count(','): 0398 return s1 0399 return s2 0400 0401 goodValues = [True, False, None] 0402 goodTypes = map(type, [int(), float()]) 0403 0404 class FunctionDumper: 0405 def __init__(self, function, root): 0406 self.function = function 0407 self.root = root 0408 assert isinstance(self.root, ModuleDumper) 0409 0410 def dump(self): 0411 try: 0412 arguments = inspect.getfullargspec(self.function) 0413 arglist = list() 0414 for index, argument in enumerate(arguments.args): 0415 if len(arguments.args) - index - 1 > len(arguments.defaults): 0416 # no default value -> normal argument 0417 arglist.append(argument) 0418 else: 0419 # there's a default value 0420 defaultIndex = index - (len(arguments.args) - len(arguments.defaults)) 0421 rawDefaultValue = arguments.defaults[defaultIndex] 0422 if type(rawDefaultValue) == type(object): 0423 defaultValue = strict_sanitize(str(rawDefaultValue)) + "()" 0424 elif rawDefaultValue in goodValues or type(rawDefaultValue) in goodTypes: 0425 defaultValue = str(rawDefaultValue) 0426 else: 0427 defaultValue = '"{0}"'.format(str(rawDefaultValue).replace("\n", " ")) 0428 if len(defaultValue) == 0 or defaultValue.isspace(): 0429 defaultValue = "None" 0430 arglist.append("{0}={1}".format(argument, defaultValue)) 0431 if self.root.indentDepth > 0: 0432 # we're in a class, make sure there's a "self" 0433 if len(arglist) == 0 or ( arglist[0].find("self") == -1 and arglist[0].find("cls") == -1 ): 0434 arglist.insert(0, "self") 0435 arglist = ', '.join(arglist) 0436 except TypeError: 0437 # not a python function, can't inspect it. try guessing argspec from docstring 0438 arglist = None 0439 try: 0440 docstring = self.function.__doc__.split('\n')[0] if self.function.__doc__ else str() 0441 try: 0442 synArglist, returnValue = parse_synopsis(docstring, str(self.function.__doc__), self.root, 0443 self.root.indentDepth > 0) 0444 except Exception as e: 0445 debugmsg(format(e)) 0446 try: 0447 synArglist2, returnValue2 = parse_numpy_like_docstring(str(self.function.__doc__), 0448 self.function.__name__, self.root, 0449 self.root.indentDepth > 0) 0450 except Exception as e: 0451 debugmsg(format(e)) 0452 synArglist = pick_better_arglist(synArglist, synArglist2) 0453 returnValue = pick_better_return_value(returnValue, returnValue2) 0454 except Exception as e: 0455 debugmsg(" Warning: Function argument extraction failed: {0}".format(e)) 0456 debugmsg(" * Traceback follows, but the error was ignored since it is not fatal.") 0457 traceback.print_exc(file=sys.stderr) 0458 synArglist = "" 0459 returnValue = "None" 0460 if docstring.find("Not implemented (virtual attribute)") != -1: 0461 # numpy hack 0462 return 0463 if arglist is None: 0464 arglist = synArglist 0465 try: 0466 funcname = self.function.__name__ 0467 except: 0468 return 0469 if funcname in self.root.special_hints: 0470 hints = self.root.special_hints[funcname] 0471 if "returns" in hints: 0472 returnValue = hints["returns"] 0473 if funcname[0].isdigit(): 0474 funcname = '_' + funcname 0475 if funcname.startswith('__'): 0476 return 0477 self.root.emit("def {0}({1}):".format(strict_sanitize(funcname), arglist)) 0478 self.root.increaseIndent() 0479 self.root.emit('"""{0}"""'.format(str(self.function.__doc__).replace('"""', '___'))) 0480 self.root.emit("return {0}".format(returnValue)) 0481 self.root.decreaseIndent() 0482 0483 class ClassDumper: 0484 def __init__(self, klass, root): 0485 self.klass = klass 0486 self.root = root 0487 assert isinstance(self.root, ModuleDumper) 0488 0489 def dump(self): 0490 debugmsg("Generating documentation for class {0}".format(self.klass.__name__)) 0491 self.root.emit("class {0}:".format(self.klass.__name__)) 0492 self.root.increaseIndent() 0493 for member, value in inspect.getmembers(self.klass): 0494 if type(value) == type: 0495 continue 0496 dumper = dumperForObject(value, member, self.root) 0497 dumper.dump() 0498 self.root.decreaseIndent() 0499 0500 dumpers = { 0501 types.FunctionType: FunctionDumper, 0502 types.BuiltinFunctionType: FunctionDumper, 0503 types.BuiltinMethodType: FunctionDumper, 0504 type: ClassDumper 0505 } 0506 try: 0507 dumpers[types.ClassType] = ClassDumper # python 2 0508 except: 0509 pass 0510 0511 def dumperForObject(object, memberName, root): 0512 try: 0513 return dumpers[type(object)](object, root) 0514 except: 0515 if hasattr(object, "__call__"): 0516 return FunctionDumper(object, root); 0517 return ScalarDumper(memberName, object, root) 0518 0519 if __name__ == '__main__': 0520 try: 0521 argscount = len(sys.argv) 0522 for arg in range(1, argscount-2): 0523 sys.path.insert(1, sys.argv[arg]) 0524 dumper = ModuleDumper(importlib.import_module(sys.argv[-1])) 0525 except IndexError: 0526 debugmsg("Usage: introspect.py <python_module_name>") 0527 exit(1) 0528 dumper.dump() 0529 debugmsg("All done -- looks good so far.")