Patchwork [05,of,10] phabricator: rework phabread to reduce memory usage and round-trips

login
register
mail settings
Submitter Jun Wu
Date July 5, 2017, 1:58 a.m.
Message ID <71433de5157b49dfbb58.1499219910@x1c>
Download mbox | patch
Permalink /patch/22001/
State Accepted
Headers show

Comments

Jun Wu - July 5, 2017, 1:58 a.m.
# HG changeset patch
# User Jun Wu <quark@fb.com>
# Date 1499211408 25200
#      Tue Jul 04 16:36:48 2017 -0700
# Node ID 71433de5157b49dfbb58ebe638a5c5ae780763dc
# Parent  e7adcd5a2c5715dc9327d03a3fc22ec65af495d5
# Available At https://bitbucket.org/quark-zju/hg-draft
#              hg pull https://bitbucket.org/quark-zju/hg-draft -r 71433de5157b
phabricator: rework phabread to reduce memory usage and round-trips

This patch reworked phabread a bit so it fetches the lightweight
"Differential Revision" metadata for a stack first. Then read other data.

This allows the code to:

  a) send 1 request to get all `hg:meta` data instead of N requests
  b) patches are read in desired order, no need to buffer the output

"b)" reduces the memory usage from O(N^2) to O(N) since we no longer keep
old patch contents in memory.

The above `N` means the number of patches in the stack.

Patch

diff --git a/contrib/phabricator.py b/contrib/phabricator.py
--- a/contrib/phabricator.py
+++ b/contrib/phabricator.py
@@ -315,48 +315,104 @@  def phabsend(ui, repo, *revs, **opts):
                               (r'node', 'Node ID'), (r'parent', 'Parent ')])
 
-def readpatch(repo, params, recursive=False):
+def querydrev(repo, params, stack=False):
+    """return a list of "Differential Revision" dicts
+
+    params is the input of "differential.query" API, and is expected to match
+    just a single Differential Revision.
+
+    A "Differential Revision dict" looks like:
+
+        {
+            "id": "2",
+            "phid": "PHID-DREV-672qvysjcczopag46qty",
+            "title": "example",
+            "uri": "https://phab.example.com/D2",
+            "dateCreated": "1499181406",
+            "dateModified": "1499182103",
+            "authorPHID": "PHID-USER-tv3ohwc4v4jeu34otlye",
+            "status": "0",
+            "statusName": "Needs Review",
+            "properties": [],
+            "branch": null,
+            "summary": "",
+            "testPlan": "",
+            "lineCount": "2",
+            "activeDiffPHID": "PHID-DIFF-xoqnjkobbm6k4dk6hi72",
+            "diffs": [
+              "3",
+              "4",
+            ],
+            "commits": [],
+            "reviewers": [],
+            "ccs": [],
+            "hashes": [],
+            "auxiliary": {
+              "phabricator:projects": [],
+              "phabricator:depends-on": [
+                "PHID-DREV-gbapp366kutjebt7agcd"
+              ]
+            },
+            "repositoryPHID": "PHID-REPO-hub2hx62ieuqeheznasv",
+            "sourcePath": null
+        }
+
+    If stack is True, return a list of "Differential Revision dict"s in an
+    order that the latter ones depend on the former ones. Otherwise, return a
+    list of a unique "Differential Revision dict".
+    """
+    result = []
+    queue = [params]
+    while queue:
+        params = queue.pop()
+        drevs = callconduit(repo, 'differential.query', params)
+        if len(drevs) != 1:
+            raise error.Abort(_('cannot get Differential Revision %r') % params)
+        drev = drevs[0]
+        result.append(drev)
+        if stack:
+            auxiliary = drev.get(r'auxiliary', {})
+            depends = auxiliary.get(r'phabricator:depends-on', [])
+            for phid in depends:
+                queue.append({'phids': [phid]})
+    result.reverse()
+    return result
+
+def readpatch(repo, params, write, stack=False):
     """generate plain-text patch readable by 'hg import'
 
-    params is passed to "differential.query". If recursive is True, also return
-    dependent patches.
+    write is usually ui.write. params is passed to "differential.query". If
+    stack is True, also write dependent patches.
     """
     # Differential Revisions
-    drevs = callconduit(repo, 'differential.query', params)
-    if len(drevs) == 1:
-        drev = drevs[0]
-    else:
-        raise error.Abort(_('cannot get Differential Revision %r') % params)
-
-    repo.ui.note(_('reading D%s\n') % drev[r'id'])
+    drevs = querydrev(repo, params, stack)
 
-    diffid = max(int(v) for v in drev[r'diffs'])
-    body = callconduit(repo, 'differential.getrawdiff', {'diffID': diffid})
-    desc = callconduit(repo, 'differential.getcommitmessage',
-                       {'revision_id': drev[r'id']})
-    header = '# HG changeset patch\n'
+    # Prefetch hg:meta property for all diffs
+    diffids = sorted(set(max(int(v) for v in drev[r'diffs']) for drev in drevs))
+    diffs = callconduit(repo, 'differential.querydiffs', {'ids': diffids})
 
-    # Remove potential empty "Summary:"
-    desc = _summaryre.sub('', desc)
+    # Generate patch for each drev
+    for drev in drevs:
+        repo.ui.note(_('reading D%s\n') % drev[r'id'])
 
-    # Try to preserve metadata from hg:meta property. Write hg patch headers
-    # that can be read by the "import" command. See patchheadermap and extract
-    # in mercurial/patch.py for supported headers.
-    diffs = callconduit(repo, 'differential.querydiffs', {'ids': [diffid]})
-    props = diffs[str(diffid)][r'properties'] # could be empty list or dict
-    if props and r'hg:meta' in props:
-        meta = props[r'hg:meta']
-        for k in _metanamemap.keys():
-            if k in meta:
-                header += '# %s %s\n' % (_metanamemap[k], meta[k])
+        diffid = max(int(v) for v in drev[r'diffs'])
+        body = callconduit(repo, 'differential.getrawdiff', {'diffID': diffid})
+        desc = callconduit(repo, 'differential.getcommitmessage',
+                           {'revision_id': drev[r'id']})
+        header = '# HG changeset patch\n'
+
+        # Remove potential empty "Summary:"
+        desc = _summaryre.sub('', desc)
 
-    patch = ('%s%s\n%s') % (header, desc, body)
+        # Try to preserve metadata from hg:meta property. Write hg patch
+        # headers that can be read by the "import" command. See patchheadermap
+        # and extract in mercurial/patch.py for supported headers.
+        props = diffs[str(diffid)][r'properties'] # could be empty list or dict
+        if props and r'hg:meta' in props:
+            meta = props[r'hg:meta']
+            for k in _metanamemap.keys():
+                if k in meta:
+                    header += '# %s %s\n' % (_metanamemap[k], meta[k])
 
-    # Check dependencies
-    if recursive:
-        auxiliary = drev.get(r'auxiliary', {})
-        depends = auxiliary.get(r'phabricator:depends-on', [])
-        for phid in depends:
-            patch = readpatch(repo, {'phids': [phid]}, recursive=True) + patch
-    return patch
+        write(('%s%s\n%s') % (header, desc, body))
 
 @command('phabread',
@@ -375,4 +431,3 @@  def phabread(ui, repo, revid, **opts):
     except ValueError:
         raise error.Abort(_('invalid Revision ID: %s') % revid)
-    patch = readpatch(repo, {'ids': [revid]}, recursive=opts.get('stack'))
-    ui.write(patch)
+    readpatch(repo, {'ids': [revid]}, ui.write, opts.get('stack'))