Warning, file /education/labplot/admin/asan_symbolize.py was not indexed or was modified since last indexation (in which case cross-reference links may be missing, inaccurate or erroneous).
0001 #!/usr/bin/env python 0002 #===- lib/asan/scripts/asan_symbolize.py -----------------------------------===# 0003 # 0004 # The LLVM Compiler Infrastructure 0005 # 0006 # This file is distributed under the University of Illinois Open Source 0007 # License. See LICENSE.TXT for details. 0008 # 0009 #===------------------------------------------------------------------------===# 0010 import argparse 0011 import bisect 0012 import getopt 0013 import os 0014 import re 0015 import subprocess 0016 import sys 0017 0018 symbolizers = {} 0019 DEBUG = False 0020 demangle = False 0021 binutils_prefix = None 0022 sysroot_path = None 0023 binary_name_filter = None 0024 fix_filename_patterns = None 0025 logfile = sys.stdin 0026 allow_system_symbolizer = True 0027 0028 # FIXME: merge the code that calls fix_filename(). 0029 def fix_filename(file_name): 0030 if fix_filename_patterns: 0031 for path_to_cut in fix_filename_patterns: 0032 file_name = re.sub('.*' + path_to_cut, '', file_name) 0033 file_name = re.sub('.*asan_[a-z_]*.cc:[0-9]*', '_asan_rtl_', file_name) 0034 file_name = re.sub('.*crtstuff.c:0', '???:0', file_name) 0035 return file_name 0036 0037 def sysroot_path_filter(binary_name): 0038 return sysroot_path + binary_name 0039 0040 def guess_arch(addr): 0041 # Guess which arch we're running. 10 = len('0x') + 8 hex digits. 0042 if len(addr) > 10: 0043 return 'x86_64' 0044 else: 0045 return 'i386' 0046 0047 class Symbolizer(object): 0048 def __init__(self): 0049 pass 0050 0051 def symbolize(self, addr, binary, offset): 0052 """Symbolize the given address (pair of binary and offset). 0053 0054 Overridden in subclasses. 0055 Args: 0056 addr: virtual address of an instruction. 0057 binary: path to executable/shared object containing this instruction. 0058 offset: instruction offset in the @binary. 0059 Returns: 0060 list of strings (one string for each inlined frame) describing 0061 the code locations for this instruction (that is, function name, file 0062 name, line and column numbers). 0063 """ 0064 return None 0065 0066 0067 class LLVMSymbolizer(Symbolizer): 0068 def __init__(self, symbolizer_path, default_arch, system, dsym_hints=[]): 0069 super(LLVMSymbolizer, self).__init__() 0070 self.symbolizer_path = symbolizer_path 0071 self.default_arch = default_arch 0072 self.system = system 0073 self.dsym_hints = dsym_hints 0074 self.pipe = self.open_llvm_symbolizer() 0075 0076 def open_llvm_symbolizer(self): 0077 cmd = [self.symbolizer_path, 0078 '--use-symbol-table=true', 0079 '--demangle=%s' % demangle, 0080 '--functions=short', 0081 '--inlining=true', 0082 '--default-arch=%s' % self.default_arch] 0083 if self.system == 'Darwin': 0084 for hint in self.dsym_hints: 0085 cmd.append('--dsym-hint=%s' % hint) 0086 if DEBUG: 0087 print ' '.join(cmd) 0088 try: 0089 result = subprocess.Popen(cmd, stdin=subprocess.PIPE, 0090 stdout=subprocess.PIPE) 0091 except OSError: 0092 result = None 0093 return result 0094 0095 def symbolize(self, addr, binary, offset): 0096 """Overrides Symbolizer.symbolize.""" 0097 if not self.pipe: 0098 return None 0099 result = [] 0100 try: 0101 symbolizer_input = '"%s" %s' % (binary, offset) 0102 if DEBUG: 0103 print symbolizer_input 0104 print >> self.pipe.stdin, symbolizer_input 0105 while True: 0106 function_name = self.pipe.stdout.readline().rstrip() 0107 if not function_name: 0108 break 0109 file_name = self.pipe.stdout.readline().rstrip() 0110 file_name = fix_filename(file_name) 0111 if (not function_name.startswith('??') or 0112 not file_name.startswith('??')): 0113 # Append only non-trivial frames. 0114 result.append('%s in %s %s' % (addr, function_name, 0115 file_name)) 0116 except Exception: 0117 result = [] 0118 if not result: 0119 result = None 0120 return result 0121 0122 0123 def LLVMSymbolizerFactory(system, default_arch, dsym_hints=[]): 0124 symbolizer_path = os.getenv('LLVM_SYMBOLIZER_PATH') 0125 if not symbolizer_path: 0126 symbolizer_path = os.getenv('ASAN_SYMBOLIZER_PATH') 0127 if not symbolizer_path: 0128 # Assume llvm-symbolizer is in PATH. 0129 symbolizer_path = 'llvm-symbolizer' 0130 return LLVMSymbolizer(symbolizer_path, default_arch, system, dsym_hints) 0131 0132 0133 class Addr2LineSymbolizer(Symbolizer): 0134 def __init__(self, binary): 0135 super(Addr2LineSymbolizer, self).__init__() 0136 self.binary = binary 0137 self.pipe = self.open_addr2line() 0138 self.output_terminator = -1 0139 0140 def open_addr2line(self): 0141 addr2line_tool = 'addr2line' 0142 if binutils_prefix: 0143 addr2line_tool = binutils_prefix + addr2line_tool 0144 cmd = [addr2line_tool, '-fi'] 0145 if demangle: 0146 cmd += ['--demangle'] 0147 cmd += ['-e', self.binary] 0148 if DEBUG: 0149 print ' '.join(cmd) 0150 return subprocess.Popen(cmd, 0151 stdin=subprocess.PIPE, stdout=subprocess.PIPE) 0152 0153 def symbolize(self, addr, binary, offset): 0154 """Overrides Symbolizer.symbolize.""" 0155 if self.binary != binary: 0156 return None 0157 lines = [] 0158 try: 0159 print >> self.pipe.stdin, offset 0160 print >> self.pipe.stdin, self.output_terminator 0161 is_first_frame = True 0162 while True: 0163 function_name = self.pipe.stdout.readline().rstrip() 0164 file_name = self.pipe.stdout.readline().rstrip() 0165 if is_first_frame: 0166 is_first_frame = False 0167 elif function_name == '??': 0168 assert file_name == '??:0' 0169 break 0170 lines.append((function_name, file_name)); 0171 except Exception: 0172 lines.append(('??', '??:0')) 0173 return ['%s in %s %s' % (addr, function, fix_filename(file)) for (function, file) in lines] 0174 0175 class UnbufferedLineConverter(object): 0176 """ 0177 Wrap a child process that responds to each line of input with one line of 0178 output. Uses pty to trick the child into providing unbuffered output. 0179 """ 0180 def __init__(self, args, close_stderr=False): 0181 # Local imports so that the script can start on Windows. 0182 import pty 0183 import termios 0184 pid, fd = pty.fork() 0185 if pid == 0: 0186 # We're the child. Transfer control to command. 0187 if close_stderr: 0188 dev_null = os.open('/dev/null', 0) 0189 os.dup2(dev_null, 2) 0190 os.execvp(args[0], args) 0191 else: 0192 # Disable echoing. 0193 attr = termios.tcgetattr(fd) 0194 attr[3] = attr[3] & ~termios.ECHO 0195 termios.tcsetattr(fd, termios.TCSANOW, attr) 0196 # Set up a file()-like interface to the child process 0197 self.r = os.fdopen(fd, "r", 1) 0198 self.w = os.fdopen(os.dup(fd), "w", 1) 0199 0200 def convert(self, line): 0201 self.w.write(line + "\n") 0202 return self.readline() 0203 0204 def readline(self): 0205 return self.r.readline().rstrip() 0206 0207 0208 class DarwinSymbolizer(Symbolizer): 0209 def __init__(self, addr, binary): 0210 super(DarwinSymbolizer, self).__init__() 0211 self.binary = binary 0212 self.arch = guess_arch(addr) 0213 self.open_atos() 0214 0215 def open_atos(self): 0216 if DEBUG: 0217 print 'atos -o %s -arch %s' % (self.binary, self.arch) 0218 cmdline = ['atos', '-o', self.binary, '-arch', self.arch] 0219 self.atos = UnbufferedLineConverter(cmdline, close_stderr=True) 0220 0221 def symbolize(self, addr, binary, offset): 0222 """Overrides Symbolizer.symbolize.""" 0223 if self.binary != binary: 0224 return None 0225 atos_line = self.atos.convert('0x%x' % int(offset, 16)) 0226 while "got symbolicator for" in atos_line: 0227 atos_line = self.atos.readline() 0228 # A well-formed atos response looks like this: 0229 # foo(type1, type2) (in object.name) (filename.cc:80) 0230 match = re.match('^(.*) \(in (.*)\) \((.*:\d*)\)$', atos_line) 0231 if DEBUG: 0232 print 'atos_line: ', atos_line 0233 if match: 0234 function_name = match.group(1) 0235 function_name = re.sub('\(.*?\)', '', function_name) 0236 file_name = fix_filename(match.group(3)) 0237 return ['%s in %s %s' % (addr, function_name, file_name)] 0238 else: 0239 return ['%s in %s' % (addr, atos_line)] 0240 0241 0242 # Chain several symbolizers so that if one symbolizer fails, we fall back 0243 # to the next symbolizer in chain. 0244 class ChainSymbolizer(Symbolizer): 0245 def __init__(self, symbolizer_list): 0246 super(ChainSymbolizer, self).__init__() 0247 self.symbolizer_list = symbolizer_list 0248 0249 def symbolize(self, addr, binary, offset): 0250 """Overrides Symbolizer.symbolize.""" 0251 for symbolizer in self.symbolizer_list: 0252 if symbolizer: 0253 result = symbolizer.symbolize(addr, binary, offset) 0254 if result: 0255 return result 0256 return None 0257 0258 def append_symbolizer(self, symbolizer): 0259 self.symbolizer_list.append(symbolizer) 0260 0261 0262 def BreakpadSymbolizerFactory(binary): 0263 suffix = os.getenv('BREAKPAD_SUFFIX') 0264 if suffix: 0265 filename = binary + suffix 0266 if os.access(filename, os.F_OK): 0267 return BreakpadSymbolizer(filename) 0268 return None 0269 0270 0271 def SystemSymbolizerFactory(system, addr, binary): 0272 if system == 'Darwin': 0273 return DarwinSymbolizer(addr, binary) 0274 elif system == 'Linux': 0275 return Addr2LineSymbolizer(binary) 0276 0277 0278 class BreakpadSymbolizer(Symbolizer): 0279 def __init__(self, filename): 0280 super(BreakpadSymbolizer, self).__init__() 0281 self.filename = filename 0282 lines = file(filename).readlines() 0283 self.files = [] 0284 self.symbols = {} 0285 self.address_list = [] 0286 self.addresses = {} 0287 # MODULE mac x86_64 A7001116478B33F18FF9BEDE9F615F190 t 0288 fragments = lines[0].rstrip().split() 0289 self.arch = fragments[2] 0290 self.debug_id = fragments[3] 0291 self.binary = ' '.join(fragments[4:]) 0292 self.parse_lines(lines[1:]) 0293 0294 def parse_lines(self, lines): 0295 cur_function_addr = '' 0296 for line in lines: 0297 fragments = line.split() 0298 if fragments[0] == 'FILE': 0299 assert int(fragments[1]) == len(self.files) 0300 self.files.append(' '.join(fragments[2:])) 0301 elif fragments[0] == 'PUBLIC': 0302 self.symbols[int(fragments[1], 16)] = ' '.join(fragments[3:]) 0303 elif fragments[0] in ['CFI', 'STACK']: 0304 pass 0305 elif fragments[0] == 'FUNC': 0306 cur_function_addr = int(fragments[1], 16) 0307 if not cur_function_addr in self.symbols.keys(): 0308 self.symbols[cur_function_addr] = ' '.join(fragments[4:]) 0309 else: 0310 # Line starting with an address. 0311 addr = int(fragments[0], 16) 0312 self.address_list.append(addr) 0313 # Tuple of symbol address, size, line, file number. 0314 self.addresses[addr] = (cur_function_addr, 0315 int(fragments[1], 16), 0316 int(fragments[2]), 0317 int(fragments[3])) 0318 self.address_list.sort() 0319 0320 def get_sym_file_line(self, addr): 0321 key = None 0322 if addr in self.addresses.keys(): 0323 key = addr 0324 else: 0325 index = bisect.bisect_left(self.address_list, addr) 0326 if index == 0: 0327 return None 0328 else: 0329 key = self.address_list[index - 1] 0330 sym_id, size, line_no, file_no = self.addresses[key] 0331 symbol = self.symbols[sym_id] 0332 filename = self.files[file_no] 0333 if addr < key + size: 0334 return symbol, filename, line_no 0335 else: 0336 return None 0337 0338 def symbolize(self, addr, binary, offset): 0339 if self.binary != binary: 0340 return None 0341 res = self.get_sym_file_line(int(offset, 16)) 0342 if res: 0343 function_name, file_name, line_no = res 0344 result = ['%s in %s %s:%d' % ( 0345 addr, function_name, file_name, line_no)] 0346 print result 0347 return result 0348 else: 0349 return None 0350 0351 0352 class SymbolizationLoop(object): 0353 def __init__(self, binary_name_filter=None, dsym_hint_producer=None): 0354 if sys.platform == 'win32': 0355 # ASan on Windows uses dbghelp.dll to symbolize in-process, which works 0356 # even in sandboxed processes. Nothing needs to be done here. 0357 self.process_line = self.process_line_echo 0358 else: 0359 # Used by clients who may want to supply a different binary name. 0360 # E.g. in Chrome several binaries may share a single .dSYM. 0361 self.binary_name_filter = binary_name_filter 0362 self.dsym_hint_producer = dsym_hint_producer 0363 self.system = os.uname()[0] 0364 if self.system not in ['Linux', 'Darwin', 'FreeBSD']: 0365 raise Exception('Unknown system') 0366 self.llvm_symbolizers = {} 0367 self.last_llvm_symbolizer = None 0368 self.dsym_hints = set([]) 0369 self.frame_no = 0 0370 self.process_line = self.process_line_posix 0371 0372 def symbolize_address(self, addr, binary, offset): 0373 # On non-Darwin (i.e. on platforms without .dSYM debug info) always use 0374 # a single symbolizer binary. 0375 # On Darwin, if the dsym hint producer is present: 0376 # 1. check whether we've seen this binary already; if so, 0377 # use |llvm_symbolizers[binary]|, which has already loaded the debug 0378 # info for this binary (might not be the case for 0379 # |last_llvm_symbolizer|); 0380 # 2. otherwise check if we've seen all the hints for this binary already; 0381 # if so, reuse |last_llvm_symbolizer| which has the full set of hints; 0382 # 3. otherwise create a new symbolizer and pass all currently known 0383 # .dSYM hints to it. 0384 if not binary in self.llvm_symbolizers: 0385 use_new_symbolizer = True 0386 if self.system == 'Darwin' and self.dsym_hint_producer: 0387 dsym_hints_for_binary = set(self.dsym_hint_producer(binary)) 0388 use_new_symbolizer = bool(dsym_hints_for_binary - self.dsym_hints) 0389 self.dsym_hints |= dsym_hints_for_binary 0390 if self.last_llvm_symbolizer and not use_new_symbolizer: 0391 self.llvm_symbolizers[binary] = self.last_llvm_symbolizer 0392 else: 0393 self.last_llvm_symbolizer = LLVMSymbolizerFactory( 0394 self.system, guess_arch(addr), self.dsym_hints) 0395 self.llvm_symbolizers[binary] = self.last_llvm_symbolizer 0396 # Use the chain of symbolizers: 0397 # Breakpad symbolizer -> LLVM symbolizer -> addr2line/atos 0398 # (fall back to next symbolizer if the previous one fails). 0399 if not binary in symbolizers: 0400 symbolizers[binary] = ChainSymbolizer( 0401 [BreakpadSymbolizerFactory(binary), self.llvm_symbolizers[binary]]) 0402 result = symbolizers[binary].symbolize(addr, binary, offset) 0403 if result is None: 0404 if not allow_system_symbolizer: 0405 raise Exception('Failed to launch or use llvm-symbolizer.') 0406 # Initialize system symbolizer only if other symbolizers failed. 0407 symbolizers[binary].append_symbolizer( 0408 SystemSymbolizerFactory(self.system, addr, binary)) 0409 result = symbolizers[binary].symbolize(addr, binary, offset) 0410 # The system symbolizer must produce some result. 0411 assert result 0412 return result 0413 0414 def get_symbolized_lines(self, symbolized_lines): 0415 if not symbolized_lines: 0416 return [self.current_line] 0417 else: 0418 result = [] 0419 for symbolized_frame in symbolized_lines: 0420 result.append(' #%s %s' % (str(self.frame_no), symbolized_frame.rstrip())) 0421 self.frame_no += 1 0422 return result 0423 0424 def process_logfile(self): 0425 self.frame_no = 0 0426 for line in logfile: 0427 processed = self.process_line(line) 0428 print '\n'.join(processed) 0429 0430 def process_line_echo(self, line): 0431 return [line.rstrip()] 0432 0433 def process_line_posix(self, line): 0434 self.current_line = line.rstrip() 0435 #0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45) 0436 stack_trace_line_format = ( 0437 '^( *#([0-9]+) *)(0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)') 0438 match = re.match(stack_trace_line_format, line) 0439 if not match: 0440 return [self.current_line] 0441 if DEBUG: 0442 print line 0443 _, frameno_str, addr, binary, offset = match.groups() 0444 if frameno_str == '0': 0445 # Assume that frame #0 is the first frame of new stack trace. 0446 self.frame_no = 0 0447 original_binary = binary 0448 if self.binary_name_filter: 0449 binary = self.binary_name_filter(binary) 0450 symbolized_line = self.symbolize_address(addr, binary, offset) 0451 if not symbolized_line: 0452 if original_binary != binary: 0453 symbolized_line = self.symbolize_address(addr, binary, offset) 0454 return self.get_symbolized_lines(symbolized_line) 0455 0456 0457 if __name__ == '__main__': 0458 parser = argparse.ArgumentParser( 0459 formatter_class=argparse.RawDescriptionHelpFormatter, 0460 description='ASan symbolization script', 0461 epilog='Example of use:\n' 0462 'asan_symbolize.py -c "$HOME/opt/cross/bin/arm-linux-gnueabi-" ' 0463 '-s "$HOME/SymbolFiles" < asan.log') 0464 parser.add_argument('path_to_cut', nargs='*', 0465 help='pattern to be cut from the result file path ') 0466 parser.add_argument('-d','--demangle', action='store_true', 0467 help='demangle function names') 0468 parser.add_argument('-s', metavar='SYSROOT', 0469 help='set path to sysroot for sanitized binaries') 0470 parser.add_argument('-c', metavar='CROSS_COMPILE', 0471 help='set prefix for binutils') 0472 parser.add_argument('-l','--logfile', default=sys.stdin, 0473 type=argparse.FileType('r'), 0474 help='set log file name to parse, default is stdin') 0475 args = parser.parse_args() 0476 if args.path_to_cut: 0477 fix_filename_patterns = args.path_to_cut 0478 if args.demangle: 0479 demangle = True 0480 if args.s: 0481 binary_name_filter = sysroot_path_filter 0482 sysroot_path = args.s 0483 if args.c: 0484 binutils_prefix = args.c 0485 if args.logfile: 0486 logfile = args.logfile 0487 else: 0488 logfile = sys.stdin 0489 loop = SymbolizationLoop(binary_name_filter) 0490 loop.process_logfile()