kcachegrind/converters/hotshot2calltree.in

0001 #!/usr/bin/env python
0002 # _*_ coding: latin1 _*_
0003
0004 #
0005 # SPDX-FileCopyrightText: 2003 WEB.DE, Karlsruhe
0006 # SPDX-FileContributor: Jörg Beyer <job@webde-ag.de>
0007 #
0008 # SPDX-License-Identifier: GPL-2.0-only
0009 #
0010 #
0011 # This script transforms the pstat output of the hotshot
0012 # python profiler into the input of kcachegrind.
0013 #
0014 # example usage:
0015 # modify you python script to run this code:
0016 #
0017 # import hotshot
0018 # filename = "pythongrind.prof"
0019 # prof = hotshot.Profile(filename, lineevents=1)
0020 # prof.runcall(run) # assuming that "run" should be called.
0021 # prof.close()
0022 #
0023 # it will run the "run"-method under profiling and write
0024 # the results in a file, called "pythongrind.prof".
0025 #
0026 # then call this script:
0027 # hotshot2cachegrind -o <output> <input>
0028 # or here:
0029 # hotshot2cachegrind cachegrind.out.0 pythongrind.prof
0030 #
0031 # then call kcachegrind:
0032 # kcachegrind cachegrind.out.0
0033 #
0034 # TODO:
0035 #  * es gibt Probleme mit rekursiven (direkt und indirekt) Aufrufen - dann
0036 #    stimmen die Kosten nicht.
0037 #
0038 #  * einige Funktionen werden mit "?" als Name angezeigt. Evtl sind
0039 #    das nur die C/C++ extensions.
0040 #
0041 #  * es fehlt noch ein Funktionsnamen Mangling, dass die Filenamen berücksichtigt,
0042 #    zZ sind alle __init__'s und alle run's schwer unterscheidbar :-(
0043 #
0044 version = "Version ${KCACHEGRIND_VERSION}"
0045 progname = "hotshot2cachegrind"
0046
0047 import os, sys
0048 from hotshot import stats,log
0049 import os.path
0050
0051 file_limit=0
0052
0053 what2text = {
0054     log.WHAT_ADD_INFO    : "ADD_INFO",
0055     log.WHAT_DEFINE_FUNC : "DEFINE_FUNC",
0056     log.WHAT_DEFINE_FILE : "DEFINE_FILE",
0057     log.WHAT_LINENO      : "LINENO",
0058     log.WHAT_EXIT        : "EXIT",
0059     log.WHAT_ENTER       : "ENTER"}
0060
0061 # a pseudo caller on the caller stack. This represents
0062 # the Python interpreter that executes the given python
0063 # code.
0064 root_caller = ("PythonInterpreter",0,"execute")
0065
0066 class CallStack:
0067     """A tiny Stack implementation, based on python lists"""
0068     def __init__(self):
0069        self.stack = []
0070        self.recursion_counter = {}
0071     def push(self, elem):
0072         """put something on the stack"""
0073         self.stack.append(elem)
0074         rc = self.recursion_counter.get(elem, 0)
0075         self.recursion_counter[elem] = rc + 1
0076
0077     def pop(self):
0078         """get the head element of the stack and remove it from the stack"""
0079         elem = self.stack[-1:][0]
0080         rc = self.recursion_counter.get(elem) - 1
0081         if rc>0:
0082             self.recursion_counter[elem] = rc
0083         else:
0084             del self.recursion_counter[elem]
0085         return self.stack.pop()
0086
0087     def top(self):
0088         """get the head element of the stack, stack is unchanged."""
0089         return self.stack[-1:][0]
0090     def handleLineCost(self, tdelta):
0091         p, c = self.stack.pop()
0092         self.stack.append( (p,c + tdelta) )
0093     def size(self):
0094         """ return how many elements the stack has"""
0095         return len(self.stack)
0096
0097     def __str__(self):
0098         return "[stack: %s]" % self.stack
0099
0100     def recursion(self, pos):
0101         return self.recursion_counter.get(pos, 0)
0102         #return self.recursion_dict.has_key((entry[0][0], entry[0][2]))
0103
0104 def return_from_call(caller_stack, call_dict, cost_now):
0105     """return from a function call
0106        remove the function from the caller stack,
0107        add the costs to the calling function.
0108     """
0109     called, cost_at_enter = caller_stack.pop()
0110     caller, caller_cost = caller_stack.top()
0111
0112     #print "return_from_call: %s ruft %s" % (caller, called,)
0113
0114     per_file_dict = call_dict.get(called[0], {})
0115     per_caller_dict = per_file_dict.get(called[2], {})
0116     cost_so_far, call_counter = per_caller_dict.get(caller, (0, 0))
0117
0118     if caller_stack.recursion(called):
0119         per_caller_dict[caller] = (cost_so_far, call_counter + 1)
0120     else:
0121         per_caller_dict[caller] = (cost_so_far + cost_now - cost_at_enter, call_counter + 1)
0122
0123     per_file_dict[called[2]] = per_caller_dict
0124     call_dict[called[0]] = per_file_dict
0125
0126
0127 def updateStatus(filecount):
0128     sys.stdout.write("reading File #%d    \r" % filecount)
0129     sys.stdout.flush()
0130 def convertProfFiles(output, inputfilenames):
0131     """convert all the given input files into one kcachegrind
0132        input file.
0133     """
0134     call_dict = {}
0135     cost_per_pos = {}
0136     cost_per_function = {}
0137     caller_stack = CallStack()
0138     caller_stack.push((root_caller, 0))
0139
0140     total_cost = 0
0141     filecount = 1
0142     number_of_files = len(inputfilenames)
0143     for inputfilename in inputfilenames:
0144         updateStatus(filecount)
0145         cost, filecount = convertHandleFilename(inputfilename, caller_stack, call_dict, cost_per_pos, cost_per_function, filecount)
0146         total_cost += cost
0147         if (file_limit > 0) and (filecount > file_limit):
0148             break
0149
0150     print
0151     print "total_cost: % d Ticks",total_cost
0152     dumpResults(output, call_dict, total_cost, cost_per_pos, cost_per_function)
0153
0154 def convertHandleFilename(inputfilename, caller_stack, call_dict, cost_per_pos, cost_per_function, filecount):
0155     updateStatus(filecount)
0156     if not ((file_limit > 0) and (filecount > file_limit)):
0157         if os.path.isdir(inputfilename):
0158             cost, filecount = convertProfDir(inputfilename, caller_stack, call_dict, cost_per_pos, cost_per_function, filecount)
0159         elif os.path.isfile(inputfilename):
0160             cost = convertProfFile(inputfilename, caller_stack, call_dict, cost_per_pos, cost_per_function)
0161             filecount += 1
0162         else:
0163             sys.stderr.write("warn: ignoring '%s', is no file and no directory\n" % inputfilename)
0164             cost = 0
0165     return (cost, filecount)
0166
0167 def convertProfDir(start, caller_stack, call_dict, cost_per_pos, cost_per_function, filecount):
0168     cost = 0
0169     filenames = os.listdir(start)
0170     for f in filenames:
0171         if (file_limit > 0) and (filecount > file_limit):
0172             break
0173         full = os.path.join(start, f)
0174         c, filecount = convertHandleFilename(full, caller_stack, call_dict, cost_per_pos, cost_per_function, filecount)
0175         cost += c;
0176     return (cost, filecount)
0177
0178 def handleCostPerPos(cost_per_pos, pos, current_cost):
0179     """
0180        the cost per source position are managed in a dict in a dict.
0181
0182        the cost are handled per file and there per function.
0183        so, the per-file-dict contains some per-function-dicts
0184        which sum up the cost per line (in this function and in
0185        this file).
0186     """
0187     filename  = pos[0]
0188     lineno    = pos[1]
0189     funcname  = pos[2]
0190     file_dict = cost_per_pos.get(filename, {})
0191     func_dict = file_dict.get(funcname, {})
0192     func_dict.setdefault(lineno, 0)
0193     func_dict[lineno] += current_cost
0194     file_dict[funcname] = func_dict
0195     cost_per_pos[filename] = file_dict
0196
0197 def convertProfFile(inputfilename, caller_stack, call_dict, cost_per_pos, cost_per_function):
0198     """convert a single input file into one kcachegrind
0199        data.
0200
0201        this is the most expensive function in this python source :-)
0202     """
0203
0204     total_cost = 0
0205     try:
0206         logreader = log.LogReader(inputfilename)
0207         current_cost = 0
0208         hc = handleCostPerPos # shortcut
0209         for item in logreader:
0210             what, pos ,tdelta = item
0211             (file, lineno, func) = pos
0212             #line = "%s %s %d %s %d" % (what2text[what], file, lineno, func, tdelta)
0213             #print line
0214             # most common cases first
0215             if what == log.WHAT_LINENO:
0216                 # add the current cost to the current function
0217                 hc(cost_per_pos, pos, tdelta)
0218                 total_cost += tdelta
0219             elif what == log.WHAT_ENTER:
0220                 caller_stack.push((pos, total_cost))
0221                 hc(cost_per_pos, pos, tdelta)
0222                 total_cost += tdelta
0223             elif what == log.WHAT_EXIT:
0224                 hc(cost_per_pos, pos, tdelta)
0225                 total_cost += tdelta
0226                 return_from_call(caller_stack, call_dict, total_cost)
0227             else:
0228                 assert 0, "duh: %d" % what
0229
0230
0231         # I have no idea, why sometimes the stack is not empty - we
0232         # have to rewind the stack to get 100% for the root_caller
0233         while caller_stack.size() > 1:
0234             return_from_call(caller_stack, call_dict, total_cost)
0235
0236     except IOError:
0237         print "could not open inputfile '%s', ignore this." % inputfilename
0238     except EOFError, m:
0239         print "EOF: %s" % (m,)
0240     return total_cost
0241
0242 def pretty_name(file, function):
0243     #pfile = os.path.splitext(os.path.basename(file)) [0]
0244     #return "%s_[%s]" % (function, file)
0245     return "%s" % function
0246     #return "%s::%s" % (file, function)
0247     #return "%s_%s" % (pfile, function)
0248
0249 class TagWriter:
0250     def __init__(self, output):
0251         self.output = output
0252         self.last_values = {}
0253
0254     def clearTag(self, tag):
0255         if self.last_values.has_key(tag):
0256             del self.last_values[ tag ]
0257     def clear(self):
0258         self.last_values = {}
0259
0260     def write(self, tag, value):
0261         self.output.write("%s=%s\n" % (tag, value))
0262         #if (not self.last_values.has_key(tag)) or self.last_values[tag] != value:
0263         #    self.last_values[ tag ] = value
0264         #    self.output.write("%s=%s\n" % (tag, value))
0265
0266 def dumpResults(output, call_dict, total_cost, cost_per_pos, cost_per_function):
0267     """write the collected results in the format kcachegrind
0268        could read.
0269     """
0270     # the intro
0271     output.write("events: Tick\n")
0272     output.write("summary: %d\n" % total_cost)
0273     output.write("cmd: your python script\n")
0274     output.write("\n")
0275     tagwriter = TagWriter(output)
0276
0277     # now the costs per line
0278     for file in cost_per_pos.keys():
0279         func_dict = cost_per_pos[file]
0280         for func in func_dict.keys():
0281             line_dict = func_dict[func]
0282             tagwriter.write("ob", file)
0283             tagwriter.write("fn", func)# pretty_name(file, func)) ; output.write("# ^--- 2\n")
0284             tagwriter.write("fl", file)
0285             for line in line_dict:
0286                 output.write("%d %d\n" %( line, line_dict[line] ))
0287
0288     output.write("\n\n")
0289     # now the function calls. For each caller all the called
0290     # functions and their costs are written.
0291     for file in call_dict.keys():
0292         per_file_dict = call_dict[file]
0293         #print "file %s -> %s" % (file, per_file_dict)
0294         for called_x in per_file_dict.keys():
0295             #print "called_x:",called_x
0296             per_caller_dict = per_file_dict[called_x]
0297             #print "called_x %s wird gerufen von: %s" % (called_x, per_caller_dict)
0298             for caller_x in per_caller_dict.keys():
0299                 tagwriter.write("ob", caller_x[0])
0300                 tagwriter.write("fn", caller_x[2])# pretty_name(caller_x[2], caller_x[0])) ; output.write("# ^--- 1\n")
0301                 tagwriter.write("fl", caller_x[0])
0302                 tagwriter.write("cob", file)
0303                 tagwriter.write("cfn", called_x) #pretty_name(file, called_x))
0304                 tagwriter.write("cfl", file)
0305                 cost, count = per_caller_dict[caller_x]
0306                 #print "called_x:",called_x
0307                 output.write("calls=%d\n%d %d\n" % (count, caller_x[1], cost))
0308                 tagwriter.clear()
0309                 #tagwriter.clearTag("cob")
0310                 # is it a bug in kcachegrind, that the "cob=xxx" line has
0311                 # to be rewritten after a calls entry with costline ?
0312                 #assert cost <= total_cost, "caller_x: %s, per_caller_dict: %s " % (caller_x, per_caller_dict, )
0313                 #output.write("calls=%d\n%d %d\n" % (count, caller_x[1], cost))
0314                 output.write("\n")
0315
0316 def run_without_optparse():
0317     """parse the options without optparse, use sys.argv"""
0318     if  len(sys.argv) < 4 or sys.argv[1] != "-o" :
0319         print "usage: hotshot2cachegrind -o outputfile in1 [in2 [in3 [...]]]"
0320         return
0321     outputfilename = sys.argv[2]
0322     try:
0323         output = file(outputfilename, "w")
0324         args = sys.argv[3:]
0325         convertProfFiles(output, args)
0326         output.close()
0327     except IOError:
0328         print "could not open '%s' for writing." % outputfilename
0329
0330 def run_with_optparse():
0331     """parse the options with optparse"""
0332
0333     global file_limit
0334
0335     versiontext = "%s version: %s" % ( progname, version.split()[1], )
0336     parser = OptionParser(version=versiontext)
0337     parser.add_option("-o", "--output",
0338       action="store", type="string", dest="outputfilename",
0339       help="write output into FILE")
0340     parser.add_option("--file-limit",
0341       action="store", dest="file_limit", default=0,
0342       help="stop after given number of input files")
0343     output = sys.stdout
0344     close_output = 0
0345     (options, args) = parser.parse_args()
0346     file_limit = int(options.file_limit)
0347     try:
0348         if options.outputfilename and options.outputfilename != "-":
0349             output = file(options.outputfilename, "w")
0350             close_output = 1
0351     except IOError:
0352         print "could not open '%s' for writing." % options.outputfilename
0353     if output:
0354         convertProfFiles(output, args)
0355         if close_output:
0356             output.close()
0357
0358
0359 def profile_myself():
0360     import hotshot
0361     filename = "self.prof"
0362     if not os.path.exists(filename):
0363         prof = hotshot.Profile(filename, lineevents=1)
0364         prof.runcall(run)
0365         prof.close()
0366     else:
0367         print "not profiling myself, since '%s' exists, running normal" % filename
0368         run()
0369
0370 # check if optparse is available.
0371 try:
0372     from optparse import OptionParser
0373     run = run_with_optparse
0374 except ImportError:
0375     run = run_without_optparse
0376
0377 if __name__ == "__main__":
0378     try:
0379         run()
0380         #profile_myself()
0381     except KeyboardInterrupt:
0382         sys.exit(1)