Patchwork [6,of,6] perf: add perfrevlogrevision

login
register
mail settings
Submitter Gregory Szorc
Date Dec. 21, 2015, 4:04 a.m.
Message ID <85c0fa263fdcbdc0ee19.1450670654@ubuntu-vm-main>
Download mbox | patch
Permalink /patch/12200/
State Accepted
Headers show

Comments

Gregory Szorc - Dec. 21, 2015, 4:04 a.m.
# HG changeset patch
# User Gregory Szorc <gregory.szorc@gmail.com>
# Date 1450665501 28800
#      Sun Dec 20 18:38:21 2015 -0800
# Node ID 85c0fa263fdcbdc0ee19aa3708dce1a854c18bf9
# Parent  f43c3343c6888d7f1c960872091ebd86b3a54d3a
perf: add perfrevlogrevision

As part of investigating performance improvements to revlog reading,
I needed a mechanism to measure every part of revlog reading so I knew
where time was spent and how effective optimizations were.

This patch implements a perf command for benchmarking the various
stages of reading a single revlog revision.

When executed against a manifest revision at the end of a 30,000+
long delta chain in mozilla-central, the command demonstrates that
~80% of time is spent in zlib decompression.
Matt Mackall - Dec. 21, 2015, 6:01 p.m.
On Sun, 2015-12-20 at 20:04 -0800, Gregory Szorc wrote:
> # HG changeset patch
> # User Gregory Szorc <gregory.szorc@gmail.com>
> # Date 1450665501 28800
> #      Sun Dec 20 18:38:21 2015 -0800
> # Node ID 85c0fa263fdcbdc0ee19aa3708dce1a854c18bf9
> # Parent  f43c3343c6888d7f1c960872091ebd86b3a54d3a
> perf: add perfrevlogrevision

Looks good, queued for default. I'd really like to see sample output in
commit messages though. For people reading along at home, it looks like
this:

$ hg perfrevlogrevision -m 30000
! full
! wall 0.003581 comb 0.000000 user 0.000000 sys 0.000000 (best of 811)
! deltachain
! wall 0.000216 comb 0.000000 user 0.000000 sys 0.000000 (best of 11433)
! read
! wall 0.000090 comb 0.000000 user 0.000000 sys 0.000000 (best of 30815)
! decompress
! wall 0.002893 comb 0.000000 user 0.000000 sys 0.000000 (best of 1014)
! patch
! wall 0.000217 comb 0.000000 user 0.000000 sys 0.000000 (best of 13106)
! hash
! wall 0.000279 comb 0.000000 user 0.000000 sys 0.000000 (best of 10310)

(One wonders if the zlib code is spending a lot of time resizing Python
strings.)

-- 
Mathematics is the supreme nostalgia of our time.

Patch

diff --git a/contrib/perf.py b/contrib/perf.py
--- a/contrib/perf.py
+++ b/contrib/perf.py
@@ -1,18 +1,20 @@ 
 # perf.py - performance test routines
 '''helper extension to measure performance'''
 
 from mercurial import cmdutil, scmutil, util, commands, obsolete
-from mercurial import repoview, branchmap, merge, copies, error
+from mercurial import repoview, branchmap, merge, copies, error, revlog
+from mercurial import mdiff
 import time, os, sys
 import random
 import functools
 
 formatteropts = commands.formatteropts
+revlogopts = commands.debugrevlogopts
 
 cmdtable = {}
 command = cmdutil.command(cmdtable)
 
 def getlen(ui):
     if ui.configbool("perf", "stub"):
         return lambda x: 1
     return len
@@ -483,16 +485,106 @@  def perfrevlog(ui, repo, file_, **opts):
     def d():
         r = revlog.revlog(lambda fn: open(fn, 'rb'), file_)
         for x in xrange(0, _len(r), dist):
             r.revision(r.node(x))
 
     timer(d)
     fm.end()
 
+@command('perfrevlogrevision', revlogopts + formatteropts +
+         [('', 'cache', False, 'use caches instead of clearing')],
+         '-c|-m|FILE REV')
+def perfrevlogrevision(ui, repo, file_, rev=None, cache=None, **opts):
+    """Benchmark obtaining a revlog revision.
+
+    Obtaining a revlog revision consists of roughly the following steps:
+
+    1. Compute the delta chain
+    2. Obtain the raw chunks for that delta chain
+    3. Decompress each raw chunk
+    4. Apply binary patches to obtain fulltext
+    5. Verify hash of fulltext
+
+    This command measures the time spent in each of these phases.
+    """
+    if opts.get('changelog') or opts.get('manifest'):
+        file_, rev = None, file_
+    elif rev is None:
+        raise error.CommandError('perfrevlogrevision', 'invalid arguments')
+
+    r = cmdutil.openrevlog(repo, 'perfrevlogrevision', file_, opts)
+    node = r.lookup(rev)
+    rev = r.rev(node)
+
+    def dodeltachain(rev):
+        if not cache:
+            r.clearcaches()
+        r._deltachain(rev)
+
+    def doread(chain):
+        if not cache:
+            r.clearcaches()
+        r._chunkraw(chain[0], chain[-1])
+
+    def dodecompress(data, chain):
+        if not cache:
+            r.clearcaches()
+
+        start = r.start
+        length = r.length
+        inline = r._inline
+        iosize = r._io.size
+        buffer = util.buffer
+        offset = start(chain[0])
+
+        for rev in chain:
+            chunkstart = start(rev)
+            if inline:
+                chunkstart += (rev + 1) * iosize
+            chunklength = length(rev)
+            b = buffer(data, chunkstart - offset, chunklength)
+            revlog.decompress(b)
+
+    def dopatch(text, bins):
+        if not cache:
+            r.clearcaches()
+        mdiff.patches(text, bins)
+
+    def dohash(text):
+        if not cache:
+            r.clearcaches()
+        r._checkhash(text, node, rev)
+
+    def dorevision():
+        if not cache:
+            r.clearcaches()
+        r.revision(node)
+
+    chain = r._deltachain(rev)[0]
+    data = r._chunkraw(chain[0], chain[-1])
+    bins = r._chunks(chain)
+    text = str(bins[0])
+    bins = bins[1:]
+    text = mdiff.patches(text, bins)
+
+    benches = [
+        (lambda: dorevision(), 'full'),
+        (lambda: dodeltachain(rev), 'deltachain'),
+        (lambda: doread(chain), 'read'),
+        (lambda: dodecompress(data, chain), 'decompress'),
+        (lambda: dopatch(text, bins), 'patch'),
+        (lambda: dohash(text), 'hash'),
+    ]
+
+    for fn, title in benches:
+        timer, fm = gettimer(ui, opts)
+        timer(fn, title=title)
+        fm.end()
+
 @command('perfrevset',
          [('C', 'clear', False, 'clear volatile cache between each call.'),
           ('', 'contexts', False, 'obtain changectx for each revision')]
          + formatteropts, "REVSET")
 def perfrevset(ui, repo, expr, clear=False, contexts=False, **opts):
     """benchmark the execution time of a revset
 
     Use the --clean option if need to evaluate the impact of build volatile
diff --git a/tests/test-contrib-perf.t b/tests/test-contrib-perf.t
--- a/tests/test-contrib-perf.t
+++ b/tests/test-contrib-perf.t
@@ -86,16 +86,18 @@  perfstatus
    perfmoonwalk  benchmark walking the changelog backwards
    perfnodelookup
                  (no help text available)
    perfparents   (no help text available)
    perfpathcopies
                  (no help text available)
    perfrawfiles  (no help text available)
    perfrevlog    (no help text available)
+   perfrevlogrevision
+                 Benchmark obtaining a revlog revision.
    perfrevrange  (no help text available)
    perfrevset    benchmark the execution time of a revset
    perfstartup   (no help text available)
    perfstatus    (no help text available)
    perftags      (no help text available)
    perftemplating
                  (no help text available)
    perfvolatilesets
@@ -131,16 +133,17 @@  perfstatus
   $ hg perflrucache
   $ hg perfmanifest 2
   $ hg perfmergecalculate -r 3
   $ hg perfmoonwalk
   $ hg perfnodelookup 2
   $ hg perfpathcopies 1 2
   $ hg perfrawfiles 2
   $ hg perfrevlog .hg/store/data/a.i
+  $ hg perfrevlogrevision -m 0
   $ hg perfrevrange
   $ hg perfrevset 'all()'
   $ hg perfstartup
   $ hg perfstatus
   $ hg perftags
   $ hg perftemplating
   $ hg perfvolatilesets
   $ hg perfwalk