File indexing completed on 2024-04-28 16:44:10
0001 # SPDX-License-Identifier: GPL-3.0-only OR LicenseRef-KDE-Accepted-GPL 0002 # SPDX-FileCopyrightText: 2021-2022 Harald Sitter <sitter@kde.org> 0003 0004 from typing import Mapping 0005 import gdb 0006 from gdb.FrameDecorator import FrameDecorator 0007 0008 from datetime import datetime 0009 import uuid 0010 import os 0011 import json 0012 import subprocess 0013 import signal 0014 import re 0015 import binascii 0016 import platform 0017 import multiprocessing 0018 from pathlib import Path 0019 0020 if os.getenv('DRKONQI_WITH_SENTRY'): 0021 # Initialize sentry reports for exceptions in this script 0022 try: 0023 import sentry_sdk 0024 sentry_sdk.init( 0025 dsn="https://d6d53bb0121041dd97f59e29051a1781@errors-eval.kde.org/13", 0026 traces_sample_rate=1.0, 0027 release="drkonqi@" + os.getenv('DRKONQI_VERSION'), 0028 ignore_errors=[KeyboardInterrupt], 0029 ) 0030 except ImportError: 0031 print("python sentry-sdk not installed :(") 0032 0033 try: 0034 import distro 0035 except ImportError: 0036 print("python distro module missing, disabling sentry") 0037 del os.environ['DRKONQI_WITH_SENTRY'] 0038 0039 try: 0040 import psutil 0041 except ImportError: 0042 print("python psutil module missing, disabling sentry") 0043 del os.environ['DRKONQI_WITH_SENTRY'] 0044 0045 def mangle_path(path): 0046 if not path: 0047 return path 0048 return re.sub(str(Path.home()), "$HOME", path, count=1) 0049 0050 class SentryQMLThread: 0051 def __init__(self): 0052 self.payload = None 0053 # should we iterate the inferiors? Probably makes no diff for 99% of apps. 0054 for thread in gdb.selected_inferior().threads(): 0055 if not thread.is_valid() : 0056 continue 0057 thread.switch() 0058 if gdb.selected_thread() != thread: 0059 continue # failed to switch :shrug: 0060 0061 try: 0062 frame = gdb.newest_frame() 0063 except gdb.error: 0064 pass 0065 while frame: 0066 ret = qml_trace_frame(frame) 0067 if ret: 0068 self.payload = ret 0069 break 0070 try: 0071 frame = frame.older() 0072 except gdb.error: 0073 pass 0074 0075 def to_sentry_frame(self, frame): 0076 print("level={level} func={func} at={file}:{line}".format(**frame) ) 0077 return { 0078 'platform': 'other', # always different from the cpp/native frames. alas, technically this frame isn't a javascript frame 0079 'filename': mangle_path(frame['file']), 0080 'function': frame['func'], 0081 'lineno': int(frame['line']), 0082 'in_app': True # qml is always in the app I should think 0083 } 0084 0085 def to_sentry_frames(self, frames): 0086 lst = [] 0087 for frame in frames: 0088 data = self.to_sentry_frame(frame) 0089 if not data: 0090 continue 0091 lst.append(data) 0092 return lst 0093 0094 def to_dict(self): 0095 if not self.payload: 0096 return None 0097 0098 payload = self.payload 0099 0100 from pygdbmi import gdbmiparser 0101 result = gdbmiparser.parse_response("*stopped," + payload) 0102 frames = result['payload']['frame'] 0103 print(frames) 0104 if type(frames) is dict: # single frames traces aren't arrays to make it more fun -.- 0105 frames = [frames] 0106 lst = self.to_sentry_frames(frames) 0107 print(lst) 0108 if lst: 0109 return { 0110 'id': 'QML', # docs say this is typically a number to there is indeed no enforcement it seems 0111 'name': 'QML', 0112 'crashed': True, 0113 'stacktrace': { 0114 'frames': self.to_sentry_frames(frames) 0115 } 0116 } 0117 return None 0118 0119 def to_list(self): 0120 data = self.to_dict() 0121 if data: 0122 return [data] 0123 return [] 0124 0125 # Only grabing the most local block, technically we could also gather up encompassing scopes but it may be a bit much. 0126 class SentryVariables: 0127 def __init__(self, frame): 0128 self.frame = frame 0129 0130 def block(self): 0131 try: 0132 return self.frame.block() 0133 except: 0134 return None 0135 0136 def to_dict(self): 0137 ret = {} 0138 block = self.block() 0139 if not block: 0140 return ret 0141 0142 for symbol in block: 0143 try: 0144 ret[str(symbol)] = str(symbol.value(self.frame)) 0145 except: 0146 pass # either not a variable or not stringable 0147 return ret 0148 0149 class SentryFrame: 0150 def __init__(self, gdb_frame): 0151 self.frame = gdb_frame 0152 self.sal = gdb_frame.find_sal() 0153 0154 def type(self): 0155 return self.frame.type() 0156 0157 def filename(self): 0158 return self.sal.symtab.fullname() if (self.sal and self.sal.symtab) else None 0159 0160 def lineNumber(self): 0161 if not self.sal.line: 0162 return None 0163 if self.sal.line < 0: 0164 return None 0165 # NOTE "The line number of the call, starting at 1." - I'm almost sure gdb starts at 0, so add 1 0166 return self.sal.line + 1 0167 0168 def function(self): 0169 return self.frame.name() or self.frame.function() or None 0170 0171 def package(self): 0172 name = gdb.solib_name(self.frame.pc()) 0173 if not name: 0174 return name 0175 # NOTE: realpath because neon's gdb is confused over UsrMerge symlinking of /lib to /usr/lib messing up 0176 # path consistency (mapping data and by extension SentryImage instances use the real path already though 0177 return os.path.realpath(name) 0178 0179 def address(self): 0180 return ('0x%x' % self.frame.pc()) 0181 0182 def to_dict(self): 0183 return { 0184 'filename': mangle_path(self.filename()), 0185 'function': self.function(), 0186 'package': self.package(), 0187 'instruction_addr': self.address(), 0188 'lineno': self.lineNumber(), 0189 'vars': SentryVariables(self.frame).to_dict() 0190 } 0191 0192 class SentryRegisters: 0193 def __init__(self, gdb_frame): 0194 self.frame = gdb_frame 0195 0196 def to_dict(self): 0197 js = {} 0198 try: # registers() is only available in somewhat new gdbs. (e.g. not ubuntu 20.04) 0199 for register in self.frame.architecture().registers(): 0200 if register.startswith('ymm'): # ymm actually contains stuff sentry cannot handle. alas :( 0201 continue 0202 value = self.frame.read_register(register).format_string(format='x') 0203 if value: # may be empty if the value cannot be expressed as hex (happens for extra gdb register magic - 'ymm0' etc) 0204 js[register.name] = value 0205 else: 0206 js[register.name] = "0x0" 0207 except AttributeError: 0208 return None 0209 return js 0210 0211 class SentryTrace: 0212 def __init__(self, thread): 0213 thread.switch() 0214 self.frame = gdb.newest_frame() 0215 0216 def to_dict(self): 0217 frames = [ SentryFrame(frame) for frame in gdb.FrameIterator.FrameIterator(self.frame) ] 0218 0219 # throw away kcrash or sigtrap frame, and above. they are useless noise 0220 kcrash_index = -1 0221 trap_index = -1 0222 for index, frame in enumerate(frames): 0223 if frame.function() and frame.function().startswith('KCrash::defaultCrashHandler'): 0224 kcrash_index = index 0225 if frame.type() == gdb.SIGTRAMP_FRAME: 0226 trap_index = index 0227 clip_index = max(kcrash_index, trap_index) 0228 if clip_index > -1: 0229 frames = frames[(clip_index + 1):] 0230 0231 # Sentry format oddly wants oldest frame first. TODO 0232 frames.reverse() 0233 return { 'frames': [ frame.to_dict() for frame in frames ], 'registers': SentryRegisters(self.frame).to_dict() } 0234 0235 class SentryThread: 0236 def __init__(self, gdb_thread, is_crashed): 0237 self.thread = gdb_thread 0238 self.is_crashed = is_crashed 0239 0240 def to_dict(self): 0241 # https://develop.sentry.dev/sdk/event-payloads/threads/ 0242 # As per Sentry policy, the thread that crashed with an exception should not have a stack trace, 0243 # but instead, the thread_id attribute should be set on the exception and Sentry will connect the two. 0244 return { 0245 'id': self.thread.ptid[1], 0246 'name': self.thread.name, 0247 'crashed': self.is_crashed, 0248 'stacktrace': SentryTrace(self.thread).to_dict() 0249 } 0250 0251 class SentryImage: 0252 # NOTE: realpath hacks because neon's gdb is confused over UsrMerge symlinking of /lib to /usr/lib messing up 0253 # path consistency so always force realpathing for our purposes (this also is applied in SentryFrame) 0254 _objfiles = {} 0255 0256 def objfiles(self): 0257 if SentryImage._objfiles: 0258 return SentryImage._objfiles 0259 0260 objfiles = {} 0261 for objfile in gdb.objfiles(): 0262 objfiles[objfile.filename] = objfile 0263 objfiles[os.path.realpath(objfile.filename)] = objfile 0264 SentryImage._objfiles = objfiles 0265 return objfiles 0266 0267 # This can throw if objfiles fail to resolve! 0268 def __init__(self, file, start, end): 0269 # Awkwardly gdb python doesn't really give access to the solibs, meanwhile 0270 # the CLI doesn't really give access to the build_id. So we need to tuck 0271 # the two together to get comprehensive data on the loaded images. 0272 self.valid = False 0273 self.file = os.path.realpath(file) 0274 self.image_start = start 0275 self.image_end = end 0276 # Required! We can't build a debug_id without it and we require a debug_id! 0277 try: 0278 # If the mapped file isn't actually a library it will not be in the objfile rendering the image moot. 0279 # This happens because we need to construct off of proc mapping data. This also includes /dev nodes, 0280 # cache files and the like. The easiest way to filter them out is to check if the file is in the objfiles. 0281 self.objfile = self.objfiles()[self.file] 0282 except KeyError: 0283 if self.file.endswith(".so"): 0284 raise Exception("unexpected mapping fail {} {}".format(self.file, self.objfiles())) 0285 return 0286 self.valid = True 0287 0288 def debug_id(self): 0289 # Identifier of the dynamic library or executable. 0290 # It is the value of the build_id custom section and must be formatted 0291 # as UUID truncated to the leading 16 bytes. 0292 build_id = self.build_id() 0293 truncate_bytes = 16 0294 build_id = build_id + ("00" * truncate_bytes) 0295 return str(uuid.UUID(bytes_le=binascii.unhexlify(build_id)[:truncate_bytes])) 0296 0297 def build_id(self): 0298 return self.objfile.build_id 0299 0300 def to_dict(self): 0301 if not self.valid: 0302 return None 0303 # https://develop.sentry.dev/sdk/event-payloads/debugmeta 0304 0305 return { 0306 'type': 'elf', 0307 'image_addr': hex(self.image_start), 0308 'image_size': (self.image_end - self.image_start), 0309 'debug_id': self.debug_id(), 0310 # 'debug_file': None, # technically could get this from objfiles somehow but probably not useful cause it can't be used for anything 0311 'code_id': self.build_id(), 0312 'code_file': self.file, 0313 # 'image_vmaddr': None, # not available we'd have to read the ELF I think 0314 'arch': platform.machine(), 0315 } 0316 0317 def get_stdout(proc): 0318 proc = subprocess.run(proc, stdout=subprocess.PIPE) 0319 if proc.returncode != 0: 0320 return '' 0321 return proc.stdout.decode("utf-8").strip() 0322 0323 class SentryImages: 0324 _mapping_re = re.compile( 0325 r"""(?x) 0326 0327 \s* 0328 0329 (?P<start> 0330 0[xX][a-fA-F0-9]+ 0331 ) 0332 0333 \s+ 0334 0335 (?P<end> 0336 0[xX][a-fA-F0-9]+ 0337 ) 0338 0339 \s+ 0340 0341 (?P<size> 0342 0[xX][a-fA-F0-9]+ 0343 ) 0344 0345 \s+ 0346 0347 (?P<offset> 0348 0[xX][a-fA-F0-9]+ 0349 ) 0350 0351 \s+ 0352 0353 ( 0354 (?P<permissions> 0355 [rwxps-]+) 0356 \s+ 0357 )? 0358 0359 (?P<file> 0360 [\/|\/][\w|\S]+|\S+\.\S+|[a-zA-Z]* 0361 ) 0362 """ 0363 ) 0364 0365 def __init__(self): 0366 # NB: gdb also has `info sharedlibrary` but that refers to section addresses inside the image. this would mess 0367 # up symbolication as we need the correct image start in the memory region. The only way to get that is through 0368 # proc mappings. 0369 mapping = {} 0370 try: 0371 output = gdb.execute('info proc mappings', to_string=True) 0372 except: 0373 return 0374 for line in output.splitlines(): 0375 match = SentryImages._mapping_re.match(line) 0376 if not match: 0377 continue 0378 start = int(match.group('start'), 0) 0379 end = int(match.group('end'), 0) 0380 # we'll calculate size ourselves; the match is not used 0381 # offset basically just skips over previous sections so we don't really care 0382 file = match.group('file') 0383 if file not in mapping: 0384 mapping[file] = {'start': start, 'end': end} 0385 continue 0386 mapping[file]['start'] = min(mapping[file]['start'], start) 0387 mapping[file]['end'] = max(mapping[file]['end'], end) 0388 0389 # TODO: if the regexing fails we could fall back to reading /proc/1/maps instead, I'd rather have more code than useless traces because of missing images 0390 self.mappings = mapping 0391 0392 def to_list(self): 0393 ret = [] 0394 for file, mapping in self.mappings.items(): 0395 image = SentryImage(file=file, start=mapping['start'], end=mapping['end']) 0396 if not image.valid: # images are invalid if the file wasn't actually found in the gdb.objfiles 0397 continue 0398 ret.append(image.to_dict()) 0399 return ret 0400 0401 class SentryEvent: 0402 def make(self, program, crash_thread): 0403 crash_signal = int(os.getenv('DRKONQI_SIGNAL')) 0404 vm = psutil.virtual_memory() 0405 boot_time = datetime.utcfromtimestamp(psutil.boot_time()).strftime('%Y-%m-%dT%H:%M:%S') 0406 0407 # crutch to get the build id. if we did this outside gdb I expect it'd be neater 0408 progfile = gdb.current_progspace().filename 0409 build_id = gdb.lookup_objfile(progfile).build_id 0410 0411 # distro's keys excitingly aren't the actual os-release capitalization. #fun. 0412 # distro's #build_number doesn't use this internally, so we manually need to obtain it. ugh. 0413 distro_build_id = distro.os_release_attr('build_id') 0414 if not distro_build_id: 0415 distro_build_id = distro.os_release_attr('variant_id') 0416 0417 print(get_stdout(['qdbus', '--system', 'org.freedesktop.systemd1', '/org/freedesktop/systemd1', 'org.freedesktop.systemd1.Manager.Virtualization'])) 0418 sentry_event = { # https://develop.sentry.dev/sdk/event-payloads/ 0419 "debug_meta": { # https://develop.sentry.dev/sdk/event-payloads/debugmeta/ 0420 "images": SentryImages().to_list() 0421 }, 0422 'threads': [ # https://develop.sentry.dev/sdk/event-payloads/threads/ 0423 SentryThread(thread, is_crashed=(thread == crash_thread)).to_dict() for thread in gdb.selected_inferior().threads() 0424 ] # + SentryQMLThread().to_list(), TODO make qml more efficient it iterates everything again after the sentry threads were collected. a right waste of time! 0425 , 0426 'event_id': uuid.uuid4().hex, 0427 'timestamp': datetime.utcnow().isoformat(), 0428 'message': 'Signal {} in {}'.format(crash_signal, program), 0429 'platform': 'native', 0430 'sdk': { 0431 'name': 'kde.drkonqi.gdb', 0432 'version': os.getenv('DRKONQI_VERSION'), 0433 }, 0434 'level': 'fatal', 0435 # FIXME this is kind of wrong, program ought to be mapped to the project name via our DSNs mapping table (see reportinterface.cpp) 0436 'release': "{}@unknown".format(program), 0437 'dist': build_id, 0438 'tags': { 0439 'binary': program # for fallthrough we still need a convenient way to identify things 0440 }, 0441 # TODO environment entry (could be staging for beta releases?) 0442 'contexts': { # https://develop.sentry.dev/sdk/event-payloads/contexts/ 0443 'device': { 0444 'name': get_stdout(['qdbus', '--system', 'org.freedesktop.hostname1', '/org/freedesktop/hostname1', 'org.freedesktop.hostname1.Hostname']), 0445 'family': get_stdout(['qdbus', '--system', 'org.freedesktop.hostname1', '/org/freedesktop/hostname1', 'org.freedesktop.hostname1.Chassis']), 0446 'simulator': (get_stdout(['qdbus', '--session', 'org.freedesktop.systemd1', '/org/freedesktop/systemd1', 'org.freedesktop.systemd1.Manager.Virtualization']) != ""), 0447 'arch': platform.machine(), 0448 'memory_size': vm.total, 0449 'free_memory': vm.available, 0450 'boot_time': boot_time, 0451 'timezone': get_stdout(['qdbus', '--system', 'org.freedesktop.timedate1', '/org/freedesktop/timedate1', 'org.freedesktop.timedate1.Timezone']), 0452 'processor_count': multiprocessing.cpu_count() 0453 }, 0454 'os': { 0455 'name': distro.name(), # unfortunately I don't think we can supply icons :( we could use linux as name but that sucks too 0456 'version': distro.version(), 0457 'build': distro_build_id, 0458 'kernel_version': os.uname().release, 0459 'raw_description': get_stdout(['uname', '-a']) 0460 } 0461 }, 0462 'exception': { # https://develop.sentry.dev/sdk/event-payloads/exception/ 0463 'values': [ 0464 { 0465 'value': signal.strsignal(crash_signal), 0466 'thread_id': crash_thread.ptid[1], 0467 'mechanism': { 0468 'type': 'drkonqi', 0469 'handled': False, 0470 "synthetic": True, # Docs: This flag should be set for all "segfaults" 0471 'meta': { 0472 'signal': { 0473 'number': crash_signal, 0474 'name': signal.strsignal(crash_signal) 0475 }, 0476 }, 0477 }, 0478 'stacktrace': SentryTrace(crash_thread).to_dict(), 0479 } 0480 ] 0481 } 0482 } 0483 0484 if os.getenv('DRKONQI_APP_VERSION'): 0485 sentry_event['release'] = '{}@{}'.format(program, os.getenv('DRKONQI_APP_VERSION')) 0486 0487 return sentry_event 0488 0489 def qml_trace_frame(frame): 0490 # NB: Super inspired by QtCreator's gdbbridge.py (GPL3). 0491 # I've made the code less of an eye sore though. 0492 0493 # This is a very exhaustive attempt at finding a frame that has a symbol to the 0494 # QV4::ExecutionEngine as we need its address to get the QML trace via qt_v4StackTraceForEngine. 0495 # Unfortunately there's no shorter way of accomplishing this since the engine isn't necessarily 0496 # appearing as a frame (consequently we can't easily get to a this pointer). 0497 0498 try: 0499 block = frame.block() 0500 except: 0501 block = None 0502 0503 if not block: 0504 return None 0505 0506 for symbol in block: 0507 if not symbol.is_variable and not symbol.is_argument: 0508 continue 0509 0510 value = symbol.value(frame) 0511 if value.is_optimized_out: # can't read values that have been optimized out 0512 continue 0513 0514 typeobj = value.type 0515 if typeobj.code != gdb.TYPE_CODE_PTR: 0516 continue 0517 0518 dereferenced_type = typeobj.target().unqualified() 0519 if dereferenced_type.name != 'QV4::ExecutionEngine': 0520 continue 0521 0522 addr = int(value) 0523 methods = [ 0524 'qt_v4StackTraceForEngine((void*)0x{0:x})', 0525 'qt_v4StackTrace(((QV4::ExecutionEngine *)0x{0:x})->currentContext())', 0526 'qt_v4StackTrace(((QV4::ExecutionEngine *)0x{0:x})->currentContext)', 0527 ] 0528 for method in methods: 0529 try: # throws when the function is invalid 0530 result = str(gdb.parse_and_eval(method.format(addr))) 0531 except: 0532 continue 0533 if result: 0534 # We need to massage the result a bit. It's of the form 0535 # "$addr stack=[...." 0536 # but we want to drop the addr as it's not useful data and can't get parsed. 0537 # Also drop the stack nesting. Serves no purpose for us. Also unescape the quotes. 0538 pos = result.find('"stack=[') 0539 if pos != -1: 0540 result = result[pos + 8:-2] 0541 result = result.replace('\\\"', '\"') 0542 return result 0543 0544 return None 0545 0546 def print_qml_frame(frame): 0547 print("level={level} func={func} at={file}:{line}".format(**frame) ) 0548 0549 def print_qml_frames(payload): 0550 try: # try pretty printing via pygdbmi. If it is not available print verbatim. 0551 from pygdbmi import gdbmiparser 0552 response = gdbmiparser.parse_response("*stopped," + payload) 0553 frames = response['payload']['frame'] 0554 if type(frames) is dict: # single frames traces aren't arrays to make it more fun -.- 0555 print_qml_frame(frames) 0556 else: # presumably an iterable 0557 for frame in frames: 0558 print_qml_frame(frame) 0559 except Exception as e: 0560 print("Failed to do pygdbmi parsing: {}".format(str(e))) 0561 print(payload) 0562 0563 0564 def print_qml_trace(): 0565 # should we iterate the inferiors? Probably makes no diff for 99% of apps. 0566 for thread in gdb.selected_inferior().threads(): 0567 if not thread.is_valid() : 0568 continue 0569 thread.switch() 0570 if gdb.selected_thread() != thread: 0571 continue # failed to switch :shrug: 0572 0573 try: 0574 frame = gdb.newest_frame() 0575 except gdb.error: 0576 pass 0577 while frame: 0578 ret = qml_trace_frame(frame) 0579 if ret: 0580 header = "____drkonqi_qmltrace_thread:{}____".format(str(thread.num)) 0581 print(frame) 0582 print(header) 0583 print_qml_frames(ret) 0584 print('-' * len(header)) 0585 print("(beware that frames may have been optimized out)") 0586 print() # separator newline 0587 break # next thread (there should only be one engine per thread I think?) 0588 try: 0589 frame = frame.older() 0590 except gdb.error: 0591 pass 0592 0593 def print_kcrash_error_message(): 0594 symbol = gdb.lookup_static_symbol("s_kcrashErrorMessage") 0595 if not symbol or not symbol.is_valid(): 0596 return 0597 0598 try: 0599 value = symbol.value() 0600 except: # docs say value can throw! 0601 return 0602 print("KCRASH_INFO_MESSAGE: Content of s_kcrashErrorMessage: " + value.format_string()) 0603 print() # separator newline 0604 0605 def print_sentry_payload(thread): 0606 program = os.path.basename(gdb.current_progspace().filename) 0607 payload = SentryEvent().make(program, thread) 0608 0609 tmpdir = os.getenv('DRKONQI_TMP_DIR') 0610 if tmpdir: 0611 with open(tmpdir + '/sentry_payload.json', mode='w') as tmpfile: 0612 tmpfile.write(json.dumps(payload)) 0613 tmpfile.flush() 0614 0615 def print_preamble(): 0616 thread = gdb.selected_thread() 0617 # run this first as it expects the current frame to be the crashing one and qml tracing changes the frames around 0618 print_kcrash_error_message() 0619 # changes current frame and thread! 0620 print_qml_trace() 0621 # prints sentry report 0622 if os.getenv('DRKONQI_WITH_SENTRY'): 0623 print_sentry_payload(thread)