Patchwork [2,of,2,V2] perf: benchmark command for revlog indexes

login
register
mail settings
Submitter Gregory Szorc
Date May 28, 2017, 6:13 p.m.
Message ID <f21925c52da679317028.1495995202@ubuntu-vm-main>
Download mbox | patch
Permalink /patch/20998/
State Accepted
Headers show

Comments

Gregory Szorc - May 28, 2017, 6:13 p.m.
# HG changeset patch
# User Gregory Szorc <gregory.szorc@gmail.com>
# Date 1495995190 25200
#      Sun May 28 11:13:10 2017 -0700
# Node ID f21925c52da67931702838d0579f157ce21939bb
# Parent  1ca1ef5d942aa405bec4ae74cbcf3155a5a4033f
perf: benchmark command for revlog indexes

We didn't have explicit microbenchmark coverage for loading revlog
indexes. That seems like a useful thing to have, so let's add it.

We currently measure the low-level nodemap APIs. There is room to
hook in at the actual revlog layer. This could be done as a follow-up.

The hackiest thing about this patch is specifying revlog paths.
Other commands have arguments that allow resolution of changelog,
manifest, and filelog. I needed to hook in at a lower level of
the revlog API than what the existing helper functions to resolve
revlogs allowed. I was too lazy to write some new APIs. This could
be done as a follow-up easily enough.

Example output for `hg perfrevlogindex 00changelog.i` on my
Firefox repo (404418 revisions):

! revlog constructor
! wall 0.003106 comb 0.000000 user 0.000000 sys 0.000000 (best of 912)
! read
! wall 0.003077 comb 0.000000 user 0.000000 sys 0.000000 (best of 924)
! create index object
! wall 0.000000 comb 0.000000 user 0.000000 sys 0.000000 (best of 1803994)
! retrieve index entry for rev 0
! wall 0.000193 comb 0.000000 user 0.000000 sys 0.000000 (best of 14037)
! look up missing node
! wall 0.003313 comb 0.000000 user 0.000000 sys 0.000000 (best of 865)
! look up node at rev 0
! wall 0.003295 comb 0.010000 user 0.010000 sys 0.000000 (best of 858)
! look up node at 1/4 len
! wall 0.002598 comb 0.010000 user 0.010000 sys 0.000000 (best of 1103)
! look up node at 1/2 len
! wall 0.001909 comb 0.000000 user 0.000000 sys 0.000000 (best of 1507)
! look up node at 3/4 len
! wall 0.001213 comb 0.000000 user 0.000000 sys 0.000000 (best of 2275)
! look up node at tip
! wall 0.000453 comb 0.000000 user 0.000000 sys 0.000000 (best of 5697)
! look up all nodes (forward)
! wall 0.094615 comb 0.100000 user 0.100000 sys 0.000000 (best of 100)
! look up all nodes (reverse)
! wall 0.045889 comb 0.050000 user 0.050000 sys 0.000000 (best of 100)
! retrieve all index entries (forward)
! wall 0.078398 comb 0.080000 user 0.060000 sys 0.020000 (best of 100)
! retrieve all index entries (reverse)
! wall 0.079376 comb 0.080000 user 0.070000 sys 0.010000 (best of 100)
Yuya Nishihara - May 29, 2017, 1:26 p.m.
On Sun, 28 May 2017 11:13:22 -0700, Gregory Szorc wrote:
> # HG changeset patch
> # User Gregory Szorc <gregory.szorc@gmail.com>
> # Date 1495995190 25200
> #      Sun May 28 11:13:10 2017 -0700
> # Node ID f21925c52da67931702838d0579f157ce21939bb
> # Parent  1ca1ef5d942aa405bec4ae74cbcf3155a5a4033f
> perf: benchmark command for revlog indexes

Queued, thanks.

> @@ -61,6 +63,10 @@ try:
>      from mercurial import scmutil # since 1.9 (or 8b252e826c68)
>  except ImportError:
>      pass
> +try:
> +    from mercurial import pycompat # since 3.8 (or 6041fb8f2da8)
> +except ImportError:
> +    pass

pycompat isn't used, so removed.

> +@command('perfrevlogindex', revlogopts + formatteropts,
> +         '-c|-m|FILE')
> +def perfrevlogindex(ui, repo, file_=None, **opts):
> +    """Benchmark operations against a revlog index.
> +
> +    This tests constructing a revlog instance, reading index data,
> +    parsing index data, and performing various operations related to
> +    index data.
> +    """
> +
> +    rl = cmdutil.openrevlog(repo, 'perfrevlogindex', file_, opts)
> +
> +    opener = getattr(rl, 'opener')  # trick linter
> +    indexfile = rl.indexfile
> +    data = opener.read(indexfile)
> +
> +    header = struct.unpack('>I', data[0:4])[0]
> +    version = header & 0xFFFF
> +    if version == 1:
> +        revlogio = revlog.revlogio()
> +        inline = header & (1 << 16)
> +    else:
> +        raise error.Abort(_('unsupported revlog version: %d') % version)

dropped _() since it seems perf.py tries not to depend on i18n.

> +    rllen = len(rl)
> +
> +    node0 = rl.node(0)
> +    node25 = rl.node(rllen // 4)
> +    node50 = rl.node(rllen // 2)
> +    node75 = rl.node(rllen // 4 * 3)

Nit: '(rllen * 3) // 4' would be more accurate, but I don't care much about it.

> +    node100 = rl.node(rllen - 1)

Patch

diff --git a/contrib/perf.py b/contrib/perf.py
--- a/contrib/perf.py
+++ b/contrib/perf.py
@@ -23,6 +23,7 @@  import functools
 import gc
 import os
 import random
+import struct
 import sys
 import time
 from mercurial import (
@@ -34,6 +35,7 @@  from mercurial import (
     extensions,
     mdiff,
     merge,
+    revlog,
     util,
 )
 
@@ -61,6 +63,10 @@  try:
     from mercurial import scmutil # since 1.9 (or 8b252e826c68)
 except ImportError:
     pass
+try:
+    from mercurial import pycompat # since 3.8 (or 6041fb8f2da8)
+except ImportError:
+    pass
 
 # for "historical portability":
 # define util.safehasattr forcibly, because util.safehasattr has been
@@ -857,6 +863,122 @@  def perfdiffwd(ui, repo, **opts):
         timer(d, title)
     fm.end()
 
+@command('perfrevlogindex', revlogopts + formatteropts,
+         '-c|-m|FILE')
+def perfrevlogindex(ui, repo, file_=None, **opts):
+    """Benchmark operations against a revlog index.
+
+    This tests constructing a revlog instance, reading index data,
+    parsing index data, and performing various operations related to
+    index data.
+    """
+
+    rl = cmdutil.openrevlog(repo, 'perfrevlogindex', file_, opts)
+
+    opener = getattr(rl, 'opener')  # trick linter
+    indexfile = rl.indexfile
+    data = opener.read(indexfile)
+
+    header = struct.unpack('>I', data[0:4])[0]
+    version = header & 0xFFFF
+    if version == 1:
+        revlogio = revlog.revlogio()
+        inline = header & (1 << 16)
+    else:
+        raise error.Abort(_('unsupported revlog version: %d') % version)
+
+    rllen = len(rl)
+
+    node0 = rl.node(0)
+    node25 = rl.node(rllen // 4)
+    node50 = rl.node(rllen // 2)
+    node75 = rl.node(rllen // 4 * 3)
+    node100 = rl.node(rllen - 1)
+
+    allrevs = range(rllen)
+    allrevsrev = list(reversed(allrevs))
+    allnodes = [rl.node(rev) for rev in range(rllen)]
+    allnodesrev = list(reversed(allnodes))
+
+    def constructor():
+        revlog.revlog(opener, indexfile)
+
+    def read():
+        with opener(indexfile) as fh:
+            fh.read()
+
+    def parseindex():
+        revlogio.parseindex(data, inline)
+
+    def getentry(revornode):
+        index = revlogio.parseindex(data, inline)[0]
+        index[revornode]
+
+    def getentries(revs, count=1):
+        index = revlogio.parseindex(data, inline)[0]
+
+        for i in range(count):
+            for rev in revs:
+                index[rev]
+
+    def resolvenode(node):
+        nodemap = revlogio.parseindex(data, inline)[1]
+        # This only works for the C code.
+        if nodemap is None:
+            return
+
+        try:
+            nodemap[node]
+        except error.RevlogError:
+            pass
+
+    def resolvenodes(nodes, count=1):
+        nodemap = revlogio.parseindex(data, inline)[1]
+        if nodemap is None:
+            return
+
+        for i in range(count):
+            for node in nodes:
+                try:
+                    nodemap[node]
+                except error.RevlogError:
+                    pass
+
+    benches = [
+        (constructor, 'revlog constructor'),
+        (read, 'read'),
+        (parseindex, 'create index object'),
+        (lambda: getentry(0), 'retrieve index entry for rev 0'),
+        (lambda: resolvenode('a' * 20), 'look up missing node'),
+        (lambda: resolvenode(node0), 'look up node at rev 0'),
+        (lambda: resolvenode(node25), 'look up node at 1/4 len'),
+        (lambda: resolvenode(node50), 'look up node at 1/2 len'),
+        (lambda: resolvenode(node75), 'look up node at 3/4 len'),
+        (lambda: resolvenode(node100), 'look up node at tip'),
+        # 2x variation is to measure caching impact.
+        (lambda: resolvenodes(allnodes),
+         'look up all nodes (forward)'),
+        (lambda: resolvenodes(allnodes, 2),
+         'look up all nodes 2x (forward)'),
+        (lambda: resolvenodes(allnodesrev),
+         'look up all nodes (reverse)'),
+        (lambda: resolvenodes(allnodesrev, 2),
+         'look up all nodes 2x (reverse)'),
+        (lambda: getentries(allrevs),
+         'retrieve all index entries (forward)'),
+        (lambda: getentries(allrevs, 2),
+         'retrieve all index entries 2x (forward)'),
+        (lambda: getentries(allrevsrev),
+         'retrieve all index entries (reverse)'),
+        (lambda: getentries(allrevsrev, 2),
+         'retrieve all index entries 2x (reverse)'),
+    ]
+
+    for fn, title in benches:
+        timer, fm = gettimer(ui, opts)
+        timer(fn, title=title)
+        fm.end()
+
 @command('perfrevlogrevisions', revlogopts + formatteropts +
          [('d', 'dist', 100, 'distance between the revisions'),
           ('s', 'startrev', 0, 'revision to start reading at'),
diff --git a/tests/test-contrib-perf.t b/tests/test-contrib-perf.t
--- a/tests/test-contrib-perf.t
+++ b/tests/test-contrib-perf.t
@@ -97,6 +97,8 @@  perfstatus
    perfrawfiles  (no help text available)
    perfrevlogchunks
                  Benchmark operations on revlog chunks.
+   perfrevlogindex
+                 Benchmark operations against a revlog index.
    perfrevlogrevision
                  Benchmark obtaining a revlog revision.
    perfrevlogrevisions
@@ -147,6 +149,7 @@  perfstatus
   $ hg perfnodelookup 2
   $ hg perfpathcopies 1 2
   $ hg perfrawfiles 2
+  $ hg perfrevlogindex -c
   $ hg perfrevlogrevisions .hg/store/data/a.i
   $ hg perfrevlogrevision -m 0
   $ hg perfrevlogchunks -c