Patchwork [06,of,10,V2] statprof: pass data structure to display functions

login
register
mail settings
Submitter Gregory Szorc
Date Aug. 17, 2016, 4:03 p.m.
Message ID <7409c675a2be190605f0.1471449824@ubuntu-vm-main>
Download mbox | patch
Permalink /patch/16332/
State Changes Requested
Headers show

Comments

Gregory Szorc - Aug. 17, 2016, 4:03 p.m.
# HG changeset patch
# User Gregory Szorc <gregory.szorc@gmail.com>
# Date 1471449460 25200
#      Wed Aug 17 08:57:40 2016 -0700
# Node ID 7409c675a2be190605f07277627b095976f56da8
# Parent  207736e7490b561b050d345914ade2aeefc2482b
statprof: pass data structure to display functions

Currently, statprof maintains a global "state" variable that is used by
several functions. This is preventing future modifications to enable
collection on multiple threads.

We start weaning statprof off global state by changing the display
functions to take an object containing data.

Patch

diff --git a/mercurial/statprof.py b/mercurial/statprof.py
--- a/mercurial/statprof.py
+++ b/mercurial/statprof.py
@@ -420,76 +420,77 @@  class SiteStats(object):
 class DisplayFormats:
     ByLine = 0
     ByMethod = 1
     AboutMethod = 2
     Hotpath = 3
     FlameGraph = 4
     Json = 5
 
-def display(fp=None, format=3, **kwargs):
+def display(fp=None, format=3, data=None, **kwargs):
     '''Print statistics, either to stdout or the given file object.'''
+    data = data or state
 
     if fp is None:
         import sys
         fp = sys.stdout
-    if len(state.samples) == 0:
+    if len(data.samples) == 0:
         print('No samples recorded.', file=fp)
         return
 
     if format == DisplayFormats.ByLine:
-        display_by_line(fp)
+        display_by_line(data, fp)
     elif format == DisplayFormats.ByMethod:
-        display_by_method(fp)
+        display_by_method(data, fp)
     elif format == DisplayFormats.AboutMethod:
-        display_about_method(fp, **kwargs)
+        display_about_method(data, fp, **kwargs)
     elif format == DisplayFormats.Hotpath:
-        display_hotpath(fp, **kwargs)
+        display_hotpath(data, fp, **kwargs)
     elif format == DisplayFormats.FlameGraph:
-        write_to_flame(fp)
+        write_to_flame(data, fp)
     elif format == DisplayFormats.Json:
-        write_to_json(fp)
+        write_to_json(data, fp)
     else:
         raise Exception("Invalid display format")
 
     if format != DisplayFormats.Json:
         print('---', file=fp)
-        print('Sample count: %d' % len(state.samples), file=fp)
-        print('Total time: %f seconds' % state.accumulated_time, file=fp)
+        print('Sample count: %d' % len(data.samples), file=fp)
+        print('Total time: %f seconds' % data.accumulated_time, file=fp)
 
-def display_by_line(fp):
+def display_by_line(data, fp):
     '''Print the profiler data with each sample line represented
     as one row in a table.  Sorted by self-time per line.'''
-    stats = SiteStats.buildstats(state.samples)
+    stats = SiteStats.buildstats(data.samples)
     stats.sort(reverse=True, key=lambda x: x.selfseconds())
 
     print('%5.5s %10.10s   %7.7s  %-8.8s' %
           ('%  ', 'cumulative', 'self', ''), file=fp)
     print('%5.5s  %9.9s  %8.8s  %-8.8s' %
           ("time", "seconds", "seconds", "name"), file=fp)
 
     for stat in stats:
         site = stat.site
         sitelabel = '%s:%d:%s' % (site.filename(), site.lineno, site.function)
         print('%6.2f %9.2f %9.2f  %s' % (stat.selfpercent(),
                                          stat.totalseconds(),
                                          stat.selfseconds(),
                                          sitelabel),
               file=fp)
 
-def display_by_method(fp):
+def display_by_method(data, fp):
     '''Print the profiler data with each sample function represented
     as one row in a table.  Important lines within that function are
     output as nested rows.  Sorted by self-time per line.'''
     print('%5.5s %10.10s   %7.7s  %-8.8s' %
           ('%  ', 'cumulative', 'self', ''), file=fp)
     print('%5.5s  %9.9s  %8.8s  %-8.8s' %
           ("time", "seconds", "seconds", "name"), file=fp)
 
-    stats = SiteStats.buildstats(state.samples)
+    stats = SiteStats.buildstats(data.samples)
 
     grouped = defaultdict(list)
     for stat in stats:
         grouped[stat.site.filename() + ":" + stat.site.function].append(stat)
 
     # compute sums for each function
     functiondata = []
     for fname, sitestats in grouped.iteritems():
@@ -523,29 +524,29 @@  def display_by_method(fp):
             # only show line numbers for significant locations (>1% time spent)
             if stat.selfpercent() > 1:
                 source = stat.site.getsource(25)
                 stattuple = (stat.selfpercent(), stat.selfseconds(),
                              stat.site.lineno, source)
 
                 print('%33.0f%% %6.2f   line %s: %s' % (stattuple), file=fp)
 
-def display_about_method(fp, function=None, **kwargs):
+def display_about_method(data, fp, function=None, **kwargs):
     if function is None:
         raise Exception("Invalid function")
 
     filename = None
     if ':' in function:
         filename, function = function.split(':')
 
     relevant_samples = 0
     parents = {}
     children = {}
 
-    for sample in state.samples:
+    for sample in data.samples:
         for i, site in enumerate(sample.stack):
             if site.function == function and (not filename
                 or site.filename() == filename):
                 relevant_samples += 1
                 if i != len(sample.stack) - 1:
                     parent = sample.stack[i + 1]
                     if parent in parents:
                         parents[parent] = parents[parent] + 1
@@ -559,17 +560,17 @@  def display_about_method(fp, function=No
 
     parents = [(parent, count) for parent, count in parents.iteritems()]
     parents.sort(reverse=True, key=lambda x: x[1])
     for parent, count in parents:
         print('%6.2f%%   %s:%s   line %s: %s' %
             (count / relevant_samples * 100, parent.filename(),
             parent.function, parent.lineno, parent.getsource(50)), file=fp)
 
-    stats = SiteStats.buildstats(state.samples)
+    stats = SiteStats.buildstats(data.samples)
     stats = [s for s in stats
                if s.site.function == function and
                (not filename or s.site.filename() == filename)]
 
     total_cum_sec = 0
     total_self_sec = 0
     total_self_percent = 0
     total_cum_percent = 0
@@ -592,17 +593,17 @@  def display_about_method(fp, function=No
 
     children = [(child, count) for child, count in children.iteritems()]
     children.sort(reverse=True, key=lambda x: x[1])
     for child, count in children:
         print('        %6.2f%%   line %s: %s' %
               (count / relevant_samples * 100, child.lineno,
                child.getsource(50)), file=fp)
 
-def display_hotpath(fp, limit=0.05, **kwargs):
+def display_hotpath(data, fp, limit=0.05, **kwargs):
     class HotNode(object):
         def __init__(self, site):
             self.site = site
             self.count = 0
             self.children = {}
 
         def add(self, stack, time):
             self.count += time
@@ -616,18 +617,18 @@  def display_hotpath(fp, limit=0.05, **kw
                 i = 1
                 # Skip boiler plate parts of the stack
                 while i < len(stack) and '%s:%s' % (stack[i].filename(), stack[i].function) in skips:
                     i += 1
                 if i < len(stack):
                     child.add(stack[i:], time)
 
     root = HotNode(None)
-    lasttime = state.samples[0].time
-    for sample in state.samples:
+    lasttime = data.samples[0].time
+    for sample in data.samples:
         root.add(sample.stack[::-1], sample.time - lasttime)
         lasttime = sample.time
 
     def _write(node, depth, multiple_siblings):
         site = node.site
         visiblechildren = [c for c in node.children.itervalues()
                              if c.count >= (limit * root.count)]
         if site:
@@ -664,50 +665,50 @@  def display_hotpath(fp, limit=0.05, **kw
 
         visiblechildren.sort(reverse=True, key=lambda x: x.count)
         for child in visiblechildren:
             _write(child, newdepth, len(visiblechildren) > 1)
 
     if root.count > 0:
         _write(root, 0, False)
 
-def write_to_flame(fp):
+def write_to_flame(data, fp):
     scriptpath = os.environ['HOME'] + '/flamegraph.pl'
     if not os.path.exists(scriptpath):
         print("error: missing ~/flamegraph.pl", file=fp)
         print("get it here: https://github.com/brendangregg/FlameGraph",
               file=fp)
         return
 
     fd, path = tempfile.mkstemp()
 
     file = open(path, "w+")
 
     lines = {}
-    for sample in state.samples:
+    for sample in data.samples:
         sites = [s.function for s in sample.stack]
         sites.reverse()
         line = ';'.join(sites)
         if line in lines:
             lines[line] = lines[line] + 1
         else:
             lines[line] = 1
 
     for line, count in lines.iteritems():
         file.write("%s %s\n" % (line, count))
 
     file.close()
 
     os.system("perl ~/flamegraph.pl %s > ~/flamegraph.svg" % path)
     print("Written to ~/flamegraph.svg", file=fp)
 
-def write_to_json(fp):
+def write_to_json(data, fp):
     samples = []
 
-    for sample in state.samples:
+    for sample in data.samples:
         stack = []
 
         for frame in sample.stack:
             stack.append((frame.path, frame.lineno, frame.function))
 
         samples.append((sample.time, stack))
 
     print(json.dumps(samples), file=fp)