Patchwork [5,of,5] perf: add a `perfhelper-mergecopies` command

login
register
mail settings
Submitter Pierre-Yves David
Date May 23, 2019, 4:47 p.m.
Message ID <72771aee91364de80511.1558630079@nodosa.octopoid.net>
Download mbox | patch
Permalink /patch/40216/
State Accepted
Headers show

Comments

Pierre-Yves David - May 23, 2019, 4:47 p.m.
# HG changeset patch
# User Pierre-Yves David <pierre-yves.david@octobus.net>
# Date 1558628108 -7200
#      Thu May 23 18:15:08 2019 +0200
# Node ID 72771aee91364de80511c22b356b724cde35f73f
# Parent  2a0e626bc8f1b7ac1cf38789fcc32876f8684f91
# EXP-Topic perf-mergecopies
# Available At https://bitbucket.org/octobus/mercurial-devel/
#              hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 72771aee9136
perf: add a `perfhelper-mergecopies` command

This command gather data that are useful to pick argument for `perfmergecopies`.
Augie Fackler - May 28, 2019, 1:50 p.m.
Queued, with copyediting both in commit messages and docstrings.

> On May 23, 2019, at 12:47, Pierre-Yves David <pierre-yves.david@ens-lyon.org> wrote:
> 
> # HG changeset patch
> # User Pierre-Yves David <pierre-yves.david@octobus.net>
> # Date 1558628108 -7200
> #      Thu May 23 18:15:08 2019 +0200
> # Node ID 72771aee91364de80511c22b356b724cde35f73f
> # Parent  2a0e626bc8f1b7ac1cf38789fcc32876f8684f91
> # EXP-Topic perf-mergecopies
> # Available At https://bitbucket.org/octobus/mercurial-devel/
> #              hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 72771aee9136
> perf: add a `perfhelper-mergecopies` command
> 
> This command gather data that are useful to pick argument for `perfmergecopies`.
> 
> diff --git a/contrib/perf.py b/contrib/perf.py
> --- a/contrib/perf.py
> +++ b/contrib/perf.py
> @@ -1430,6 +1430,111 @@ def perftemplating(ui, repo, testedtempl
>     timer(format)
>     fm.end()
> 
> +@command(b'perfhelper-mergecopies', formatteropts +
> +         [
> +          (b'r', b'revs', [], b'restrict search to these revisions'),
> +          (b'', b'timing', False, b'provides extra data (costly)'),
> +         ])
> +def perfhelpermergecopies(ui, repo, revs=[], **opts):
> +    """find statistic about potential parameters for `perfmergecopies`
> +
> +    This command find (base, p1, p2) triplet relevant for copytracing
> +    benchmarking in the context of a merge..  It report value for some of the
> +    parameters that impact merge copy tracing time during merge.
> +
> +    If `--timing` is set, rename detection is run and the associated timing
> +    will be reported. The extra details comes at the cost of a slower command
> +    execution.
> +
> +    Since the rename detection is only run once, other factors might easily
> +    affect the precision of the timing. However it should give a good
> +    approximation of which revision triplets are very costly.
> +    """
> +    opts = _byteskwargs(opts)
> +    fm = ui.formatter(b'perf', opts)
> +    dotiming = opts[b'timing']
> +
> +    output_template = [
> +        ("base", "%(base)12s"),
> +        ("p1", "%(p1.node)12s"),
> +        ("p2", "%(p2.node)12s"),
> +        ("p1.nb-revs", "%(p1.nbrevs)12d"),
> +        ("p1.nb-files", "%(p1.nbmissingfiles)12d"),
> +        ("p1.renames", "%(p1.renamedfiles)12d"),
> +        ("p1.time", "%(p1.time)12.3f"),
> +        ("p2.nb-revs", "%(p2.nbrevs)12d"),
> +        ("p2.nb-files", "%(p2.nbmissingfiles)12d"),
> +        ("p2.renames", "%(p2.renamedfiles)12d"),
> +        ("p2.time", "%(p2.time)12.3f"),
> +        ("renames", "%(nbrenamedfiles)12d"),
> +        ("total.time", "%(time)12.3f"),
> +        ]
> +    if not dotiming:
> +        output_template = [i for i in output_template
> +                           if not ('time' in i[0] or 'renames' in i[0])]
> +    header_names = [h for (h, v) in output_template]
> +    output = ' '.join([v for (h, v) in output_template]) + '\n'
> +    header = ' '.join(['%12s'] * len(header_names)) + '\n'
> +    fm.plain(header % tuple(header_names))
> +
> +    if not revs:
> +        revs = ['all()']
> +    revs = scmutil.revrange(repo, revs)
> +
> +    roi = repo.revs('merge() and %ld', revs)
> +    for r in roi:
> +        ctx = repo[r]
> +        p1 = ctx.p1()
> +        p2 = ctx.p2()
> +        bases = repo.changelog._commonancestorsheads(p1.rev(), p2.rev())
> +        for b in bases:
> +            b = repo[b]
> +            p1missing = copies._computeforwardmissing(b, p1)
> +            p2missing = copies._computeforwardmissing(b, p2)
> +            data = {
> +                b'base': b.hex(),
> +                b'p1.node': p1.hex(),
> +                b'p1.nbrevs': len(repo.revs('%d::%d', b.rev(), p1.rev())),
> +                b'p1.nbmissingfiles': len(p1missing),
> +                b'p2.node': p2.hex(),
> +                b'p2.nbrevs': len(repo.revs('%d::%d', b.rev(), p2.rev())),
> +                b'p2.nbmissingfiles': len(p2missing),
> +            }
> +            if dotiming:
> +                begin = util.timer()
> +                mergedata = copies.mergecopies(repo, p1, p2, b)
> +                end = util.timer()
> +                # not very stable timing since we did only one run
> +                data['time'] = end - begin
> +                # mergedata contains five dicts: "copy", "movewithdir",
> +                # "diverge", "renamedelete" and "dirmove".
> +                # The first 4 are about renamed file so lets count that.
> +                renames = len(mergedata[0])
> +                renames += len(mergedata[1])
> +                renames += len(mergedata[2])
> +                renames += len(mergedata[3])
> +                data['nbrenamedfiles'] = renames
> +                begin = util.timer()
> +                p1renames = copies.pathcopies(b, p1)
> +                end = util.timer()
> +                data['p1.time'] = end - begin
> +                begin = util.timer()
> +                p2renames = copies.pathcopies(b, p2)
> +                data['p2.time'] = end - begin
> +                end = util.timer()
> +                data['p1.renamedfiles'] = len(p1renames)
> +                data['p2.renamedfiles'] = len(p2renames)
> +            fm.startitem()
> +            fm.data(**data)
> +            # make node pretty for the human output
> +            out = data.copy()
> +            out['base'] = fm.hexfunc(b.node())
> +            out['p1.node'] = fm.hexfunc(p1.node())
> +            out['p2.node'] = fm.hexfunc(p2.node())
> +            fm.plain(output % out)
> +
> +    fm.end()
> +
> @command(b'perfhelper-pathcopies', formatteropts +
>          [
>           (b'r', b'revs', [], b'restrict search to these revisions'),
> diff --git a/tests/test-contrib-perf.t b/tests/test-contrib-perf.t
> --- a/tests/test-contrib-perf.t
> +++ b/tests/test-contrib-perf.t
> @@ -117,6 +117,8 @@ perfstatus
>    perffncachewrite
>                  (no help text available)
>    perfheads     benchmark the computation of a changelog heads
> +   perfhelper-mergecopies
> +                 find statistic about potential parameters for 'perfmergecopies'
>    perfhelper-pathcopies
>                  find statistic about potential parameters for the
>                  'perftracecopies'
> _______________________________________________
> Mercurial-devel mailing list
> Mercurial-devel@mercurial-scm.org
> https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel

Patch

diff --git a/contrib/perf.py b/contrib/perf.py
--- a/contrib/perf.py
+++ b/contrib/perf.py
@@ -1430,6 +1430,111 @@  def perftemplating(ui, repo, testedtempl
     timer(format)
     fm.end()
 
+@command(b'perfhelper-mergecopies', formatteropts +
+         [
+          (b'r', b'revs', [], b'restrict search to these revisions'),
+          (b'', b'timing', False, b'provides extra data (costly)'),
+         ])
+def perfhelpermergecopies(ui, repo, revs=[], **opts):
+    """find statistic about potential parameters for `perfmergecopies`
+
+    This command find (base, p1, p2) triplet relevant for copytracing
+    benchmarking in the context of a merge..  It report value for some of the
+    parameters that impact merge copy tracing time during merge.
+
+    If `--timing` is set, rename detection is run and the associated timing
+    will be reported. The extra details comes at the cost of a slower command
+    execution.
+
+    Since the rename detection is only run once, other factors might easily
+    affect the precision of the timing. However it should give a good
+    approximation of which revision triplets are very costly.
+    """
+    opts = _byteskwargs(opts)
+    fm = ui.formatter(b'perf', opts)
+    dotiming = opts[b'timing']
+
+    output_template = [
+        ("base", "%(base)12s"),
+        ("p1", "%(p1.node)12s"),
+        ("p2", "%(p2.node)12s"),
+        ("p1.nb-revs", "%(p1.nbrevs)12d"),
+        ("p1.nb-files", "%(p1.nbmissingfiles)12d"),
+        ("p1.renames", "%(p1.renamedfiles)12d"),
+        ("p1.time", "%(p1.time)12.3f"),
+        ("p2.nb-revs", "%(p2.nbrevs)12d"),
+        ("p2.nb-files", "%(p2.nbmissingfiles)12d"),
+        ("p2.renames", "%(p2.renamedfiles)12d"),
+        ("p2.time", "%(p2.time)12.3f"),
+        ("renames", "%(nbrenamedfiles)12d"),
+        ("total.time", "%(time)12.3f"),
+        ]
+    if not dotiming:
+        output_template = [i for i in output_template
+                           if not ('time' in i[0] or 'renames' in i[0])]
+    header_names = [h for (h, v) in output_template]
+    output = ' '.join([v for (h, v) in output_template]) + '\n'
+    header = ' '.join(['%12s'] * len(header_names)) + '\n'
+    fm.plain(header % tuple(header_names))
+
+    if not revs:
+        revs = ['all()']
+    revs = scmutil.revrange(repo, revs)
+
+    roi = repo.revs('merge() and %ld', revs)
+    for r in roi:
+        ctx = repo[r]
+        p1 = ctx.p1()
+        p2 = ctx.p2()
+        bases = repo.changelog._commonancestorsheads(p1.rev(), p2.rev())
+        for b in bases:
+            b = repo[b]
+            p1missing = copies._computeforwardmissing(b, p1)
+            p2missing = copies._computeforwardmissing(b, p2)
+            data = {
+                b'base': b.hex(),
+                b'p1.node': p1.hex(),
+                b'p1.nbrevs': len(repo.revs('%d::%d', b.rev(), p1.rev())),
+                b'p1.nbmissingfiles': len(p1missing),
+                b'p2.node': p2.hex(),
+                b'p2.nbrevs': len(repo.revs('%d::%d', b.rev(), p2.rev())),
+                b'p2.nbmissingfiles': len(p2missing),
+            }
+            if dotiming:
+                begin = util.timer()
+                mergedata = copies.mergecopies(repo, p1, p2, b)
+                end = util.timer()
+                # not very stable timing since we did only one run
+                data['time'] = end - begin
+                # mergedata contains five dicts: "copy", "movewithdir",
+                # "diverge", "renamedelete" and "dirmove".
+                # The first 4 are about renamed file so lets count that.
+                renames = len(mergedata[0])
+                renames += len(mergedata[1])
+                renames += len(mergedata[2])
+                renames += len(mergedata[3])
+                data['nbrenamedfiles'] = renames
+                begin = util.timer()
+                p1renames = copies.pathcopies(b, p1)
+                end = util.timer()
+                data['p1.time'] = end - begin
+                begin = util.timer()
+                p2renames = copies.pathcopies(b, p2)
+                data['p2.time'] = end - begin
+                end = util.timer()
+                data['p1.renamedfiles'] = len(p1renames)
+                data['p2.renamedfiles'] = len(p2renames)
+            fm.startitem()
+            fm.data(**data)
+            # make node pretty for the human output
+            out = data.copy()
+            out['base'] = fm.hexfunc(b.node())
+            out['p1.node'] = fm.hexfunc(p1.node())
+            out['p2.node'] = fm.hexfunc(p2.node())
+            fm.plain(output % out)
+
+    fm.end()
+
 @command(b'perfhelper-pathcopies', formatteropts +
          [
           (b'r', b'revs', [], b'restrict search to these revisions'),
diff --git a/tests/test-contrib-perf.t b/tests/test-contrib-perf.t
--- a/tests/test-contrib-perf.t
+++ b/tests/test-contrib-perf.t
@@ -117,6 +117,8 @@  perfstatus
    perffncachewrite
                  (no help text available)
    perfheads     benchmark the computation of a changelog heads
+   perfhelper-mergecopies
+                 find statistic about potential parameters for 'perfmergecopies'
    perfhelper-pathcopies
                  find statistic about potential parameters for the
                  'perftracecopies'