Patchwork D10225: perf-helpers: add a search-discovery-case script

login
register
mail settings
Submitter phabricator
Date March 15, 2021, 11:01 p.m.
Message ID <differential-rev-PHID-DREV-lncxu5gi3vkze7n4ctkc-req@mercurial-scm.org>
Download mbox | patch
Permalink /patch/48538/
State Superseded
Headers show

Comments

phabricator - March 15, 2021, 11:01 p.m.
marmoute created this revision.
Herald added a reviewer: hg-reviewers.
Herald added a subscriber: mercurial-patches.

REVISION SUMMARY
  This a small script I built to look for interesting discovery case. It is fairly
  basic but could be useful in various situation so lets put it in the main
  repositories.

REPOSITORY
  rHG Mercurial

BRANCH
  default

REVISION DETAIL
  https://phab.mercurial-scm.org/D10225

AFFECTED FILES
  contrib/perf-utils/search-discovery-case

CHANGE DETAILS




To: marmoute, #hg-reviewers
Cc: mercurial-patches, mercurial-devel

Patch

diff --git a/contrib/perf-utils/search-discovery-case b/contrib/perf-utils/search-discovery-case
new file mode 100755
--- /dev/null
+++ b/contrib/perf-utils/search-discovery-case
@@ -0,0 +1,197 @@ 
+#!/usr/bin/env python3
+# Search for interesting discovery instance
+#
+#  search-discovery-case REPO [REPO]…
+#
+# This use a subsetmaker extension (next to this script) to generate a steam of
+# random discovery instance. When interesting case are discovered, information
+# about them are print on the stdout.
+from __future__ import print_function
+
+import json
+import os
+import queue
+import random
+import signal
+import subprocess
+import sys
+import threading
+
+this_script = os.path.abspath(sys.argv[0])
+this_dir = os.path.dirname(this_script)
+hg_dir = os.path.join(this_dir, '..', '..')
+HG_REPO = os.path.normpath(hg_dir)
+HG_BIN = os.path.join(HG_REPO, 'hg')
+
+JOB = int(os.environ.get('NUMBER_OF_PROCESSORS', 8))
+
+
+SLICING = ('scratch', 'randomantichain', 'rev')
+
+
+def nb_revs(repo_path):
+    cmd = [
+        HG_BIN,
+        '--repository',
+        repo_path,
+        'log',
+        '--template',
+        '{rev}',
+        '--rev',
+        'tip',
+    ]
+    s = subprocess.Popen(cmd, stdout=subprocess.PIPE)
+    out, err = s.communicate()
+    return int(out)
+
+
+repos = []
+for repo in sys.argv[1:]:
+    size = nb_revs(repo)
+    repos.append((repo, size))
+
+
+def pick_one(repo):
+    pick = random.choice(SLICING)
+    seed = random.randint(0, 100000)
+    if pick == 'scratch':
+        start = int(repo[1] * 0.3)
+        end = int(repo[1] * 0.7)
+        nb = random.randint(start, end)
+        return ('scratch', nb, seed)
+    elif pick == 'randomantichain':
+        return ('randomantichain', seed)
+    elif pick == 'rev':
+        start = int(repo[1] * 0.3)
+        end = int(repo[1])
+        rev = random.randint(start, end)
+        return ('rev', rev)
+    else:
+        assert False
+
+
+done = threading.Event()
+cases = queue.Queue(maxsize=10 * JOB)
+results = queue.Queue()
+
+
+def worker():
+    while not done.is_set():
+        c = cases.get()
+        if c is None:
+            return
+        try:
+            res = process(c)
+            results.put(res)
+        except Exception as exc:
+            print('processing-failed: %s %s' % (c, exc), file=sys.stderr)
+        c = (c[0], c[2], c[1])
+        try:
+            res = process(c)
+            results.put(res)
+        except Exception as exc:
+            print('processing-failed: %s %s' % (c, exc), file=sys.stderr)
+
+
+SUBSET_PATH = os.path.join(HG_REPO, 'contrib', 'perf-utils', 'subsetmaker.py')
+
+
+CMD_BASE = (
+    HG_BIN,
+    'debugdiscovery',
+    '--template',
+    'json',
+    '--config',
+    'extensions.subset=%s' % SUBSET_PATH,
+)
+#    '--local-as-revs "$left" --local-as-revs "$right"'
+#    > /data/discovery-references/results/disco-mozilla-unified-$1-$2.txt
+#        )
+
+
+def to_revsets(case):
+    t = case[0]
+    if t == 'scratch':
+        return 'not scratch(all(), %d, "%d")' % (case[1], case[2])
+    elif t == 'randomantichain':
+        return '::randomantichain(all(), "%d")' % case[1]
+    elif t == 'rev':
+        return '::%d' % case[1]
+    else:
+        assert False
+
+
+def process(case):
+    (repo, left, right) = case
+    cmd = list(CMD_BASE)
+    cmd.append('-R')
+    cmd.append(repo[0])
+    cmd.append('--local-as-revs')
+    cmd.append(to_revsets(left))
+    cmd.append('--remote-as-revs')
+    cmd.append(to_revsets(right))
+    s = subprocess.Popen(cmd, stdout=subprocess.PIPE)
+    out, err = s.communicate()
+    return json.loads(out)[0]
+
+
+def interesting_boundary(res):
+    """check if a case is interesting or not
+
+    For now we are mostly interrested in case were we do multiple roundstrip
+    and where the boundary is somewhere in the middle of the undecided set.
+
+    Ideally, we would make this configurable, but this is not a focus for now
+
+    return None or (round-trip, undecided-common, undecided-missing)
+    """
+    roundtrips = res["total-roundtrips"]
+    if roundtrips <= 1:
+        return None
+    undecided_common = res["nb-ini_und-common"]
+    undecided_missing = res["nb-ini_und-missing"]
+    if undecided_common == 0:
+        return None
+    if undecided_missing == 0:
+        return None
+    return (roundtrips, undecided_common, undecided_missing)
+
+
+def end(*args, **kwargs):
+    done.set()
+
+
+def format_case(case):
+    return '-'.join(str(s) for s in case)
+
+
+signal.signal(signal.SIGINT, end)
+
+for i in range(JOB):
+    threading.Thread(target=worker).start()
+
+nb_cases = 0
+while not done.is_set():
+    repo = random.choice(repos)
+    left = pick_one(repo)
+    right = pick_one(repo)
+    cases.put((repo, left, right))
+    while not results.empty():
+        # results has a single reader so this is fine
+        res = results.get_nowait()
+        boundary = interesting_boundary(res)
+        if boundary is not None:
+            print(repo[0], format_case(left), format_case(right), *boundary)
+
+    nb_cases += 1
+    if not nb_cases % 100:
+        print('[%d cases generated]' % nb_cases, file=sys.stderr)
+
+for i in range(JOB):
+    try:
+        cases.put_nowait(None)
+    except queue.Full:
+        pass
+
+print('[%d cases generated]' % nb_cases, file=sys.stderr)
+print('[ouput generation is over]' % nb_cases, file=sys.stderr)