Patchwork D8697: phases: sparsify phase lists

login
register
mail settings
Submitter phabricator
Date July 7, 2020, 10:39 p.m.
Message ID <differential-rev-PHID-DREV-g36d6z257lza5fzeawri-req@mercurial-scm.org>
Download mbox | patch
Permalink /patch/46651/
State Superseded
Headers show

Comments

phabricator - July 7, 2020, 10:39 p.m.
joerg.sonnenberger created this revision.
Herald added a reviewer: hg-reviewers.
Herald added a subscriber: mercurial-patches.

REVISION SUMMARY
  When the internal and archived phase was added, allphase became a large,
  sparsely populated list. This dramatically increased the number of
  lookup operations for public relations in `phasecache.phase`. As a first
  step, define allphases and related lists explicitly to contain only the
  actual phases. Make phasenames a dictionary and create corresponding
  dictionaries for mapping phase names back to numbers. Adjust various
  list to be sparse as well with the exception of phaseroots and phasesets
  members of phasecache. Keep those as a separate step as it involves
  changes to the C module.

REPOSITORY
  rHG Mercurial

BRANCH
  default

REVISION DETAIL
  https://phab.mercurial-scm.org/D8697

AFFECTED FILES
  mercurial/bundle2.py
  mercurial/exchange.py
  mercurial/exchangev2.py
  mercurial/phases.py

CHANGE DETAILS




To: joerg.sonnenberger, #hg-reviewers
Cc: mercurial-patches, mercurial-devel

Patch

diff --git a/mercurial/phases.py b/mercurial/phases.py
--- a/mercurial/phases.py
+++ b/mercurial/phases.py
@@ -128,25 +128,28 @@ 
 
 _fphasesentry = struct.Struct(b'>i20s')
 
-INTERNAL_FLAG = 64  # Phases for mercurial internal usage only
-HIDEABLE_FLAG = 32  # Phases that are hideable
-
 # record phase index
 public, draft, secret = range(3)
-internal = INTERNAL_FLAG | HIDEABLE_FLAG
-archived = HIDEABLE_FLAG
-allphases = list(range(internal + 1))
-trackedphases = allphases[1:]
+archived = 32  # non-continuous for compatibility
+internal = 96  # non-continuous for compatibility
+allphases = (public, draft, secret, archived, internal)
+trackedphases = (draft, secret, archived, internal)
 # record phase names
 cmdphasenames = [b'public', b'draft', b'secret']  # known to `hg phase` command
-phasenames = [None] * len(allphases)
-phasenames[: len(cmdphasenames)] = cmdphasenames
+phasenames = dict(enumerate(cmdphasenames))
 phasenames[archived] = b'archived'
 phasenames[internal] = b'internal'
+# map phase name to phase number
+phasenumber = {name: phase for phase, name in phasenames.items()}
+# like phasenumber, but also include maps for the numeric and binary
+# phase number to the phase number
+phasenumber2 = phasenumber.copy()
+phasenumber2.update({phase: phase for phase in phasenames})
+phasenumber2.update({b'%i' % phase: phase for phase in phasenames})
 # record phase property
-mutablephases = tuple(allphases[1:])
-remotehiddenphases = tuple(allphases[2:])
-localhiddenphases = tuple(p for p in allphases if p & HIDEABLE_FLAG)
+mutablephases = (draft, secret, archived, internal)
+remotehiddenphases = (secret, archived, internal)
+localhiddenphases = (internal, archived)
 
 
 def supportinternal(repo):
@@ -167,7 +170,7 @@ 
     """
     repo = repo.unfiltered()
     dirty = False
-    roots = [set() for i in allphases]
+    roots = [set() for i in range(max(allphases) + 1)]
     try:
         f, pending = txnutil.trypending(repo.root, repo.svfs, b'phaseroots')
         try:
@@ -189,11 +192,10 @@ 
 def binaryencode(phasemapping):
     """encode a 'phase -> nodes' mapping into a binary stream
 
-    Since phases are integer the mapping is actually a python list:
-    [[PUBLIC_HEADS], [DRAFTS_HEADS], [SECRET_HEADS]]
+    The revision lists are encoded as (phase, root) pairs.
     """
     binarydata = []
-    for phase, nodes in enumerate(phasemapping):
+    for phase, nodes in pycompat.iteritems(phasemapping):
         for head in nodes:
             binarydata.append(_fphasesentry.pack(phase, head))
     return b''.join(binarydata)
@@ -202,8 +204,9 @@ 
 def binarydecode(stream):
     """decode a binary stream into a 'phase -> nodes' mapping
 
-    Since phases are integer the mapping is actually a python list."""
-    headsbyphase = [[] for i in allphases]
+    The (phase, root) pairs are turned back into a dictionary with
+    the phase as index and the aggregated roots of that phase as value."""
+    headsbyphase = {i: [] for i in allphases}
     entrysize = _fphasesentry.size
     while True:
         entry = stream.read(entrysize)
@@ -427,12 +430,16 @@ 
             nativeroots.append(
                 pycompat.maplist(repo.changelog.rev, self.phaseroots[phase])
             )
-        return repo.changelog.computephases(nativeroots)
+        revslen, phasesets = repo.changelog.computephases(nativeroots)
+        phasesets2 = [set() for phase in range(max(allphases) + 1)]
+        for phase, phaseset in zip(allphases, phasesets):
+            phasesets2[phase] = phaseset
+        return revslen, phasesets2
 
     def _computephaserevspure(self, repo):
         repo = repo.unfiltered()
         cl = repo.changelog
-        self._phasesets = [set() for phase in allphases]
+        self._phasesets = [set() for phase in range(max(allphases) + 1)]
         lowerroots = set()
         for phase in reversed(trackedphases):
             roots = pycompat.maplist(cl.rev, self.phaseroots[phase])
@@ -533,7 +540,7 @@ 
 
         changes = set()  # set of revisions to be changed
         delroots = []  # set of root deleted by this path
-        for phase in pycompat.xrange(targetphase + 1, len(allphases)):
+        for phase in (phase for phase in allphases if phase > targetphase):
             # filter nodes that are not in a compatible phase already
             nodes = [
                 n for n in nodes if self.phase(repo, repo[n].rev()) >= phase
@@ -766,7 +773,7 @@ 
     """
     cl = repo.changelog
 
-    headsbyphase = [[] for i in allphases]
+    headsbyphase = {i: [] for i in allphases}
     # No need to keep track of secret phase; any heads in the subset that
     # are not mentioned are implicitly secret.
     for phase in allphases[:secret]:
@@ -897,13 +904,11 @@ 
     """
     v = ui.config(b'phases', b'new-commit')
     try:
-        return phasenames.index(v)
-    except ValueError:
-        try:
-            return int(v)
-        except ValueError:
-            msg = _(b"phases.new-commit: not a valid phase name ('%s')")
-            raise error.ConfigError(msg % v)
+        return phasenumber2[v]
+    except KeyError:
+        raise error.ConfigError(
+            _(b"phases.new-commit: not a valid phase name ('%s')") % v
+        )
 
 
 def hassecret(repo):
diff --git a/mercurial/exchangev2.py b/mercurial/exchangev2.py
--- a/mercurial/exchangev2.py
+++ b/mercurial/exchangev2.py
@@ -82,15 +82,12 @@ 
         phases.registernew(repo, tr, phases.draft, csetres[b'added'])
 
     # And adjust the phase of all changesets accordingly.
-    for phase in phases.phasenames:
+    for phasenumber, phase in phases.phasenames.items():
         if phase == b'secret' or not csetres[b'nodesbyphase'][phase]:
             continue
 
         phases.advanceboundary(
-            repo,
-            tr,
-            phases.phasenames.index(phase),
-            csetres[b'nodesbyphase'][phase],
+            repo, tr, phasenumber, csetres[b'nodesbyphase'][phase],
         )
 
     # Write bookmark updates.
@@ -361,7 +358,7 @@ 
         # so we can set the linkrev accordingly when manifests are added.
         manifestnodes[cl.rev(node)] = revision.manifest
 
-    nodesbyphase = {phase: set() for phase in phases.phasenames}
+    nodesbyphase = {phase: set() for phase in phases.phasenames.values()}
     remotebookmarks = {}
 
     # addgroup() expects a 7-tuple describing revisions. This normalizes
diff --git a/mercurial/exchange.py b/mercurial/exchange.py
--- a/mercurial/exchange.py
+++ b/mercurial/exchange.py
@@ -1024,12 +1024,12 @@ 
     hasphaseheads = b'heads' in b2caps.get(b'phases', ())
     if pushop.remotephases is not None and hasphaseheads:
         # check that the remote phase has not changed
-        checks = [[] for p in phases.allphases]
+        checks = {p: [] for p in phases.allphases}
         checks[phases.public].extend(pushop.remotephases.publicheads)
         checks[phases.draft].extend(pushop.remotephases.draftroots)
-        if any(checks):
-            for nodes in checks:
-                nodes.sort()
+        if any(pycompat.itervalues(checks)):
+            for phase in checks:
+                checks[phase].sort()
             checkdata = phases.binaryencode(checks)
             bundler.newpart(b'check:phases', data=checkdata)
 
@@ -1104,7 +1104,7 @@ 
     """push phase information through a bundle2 - binary part"""
     pushop.stepsdone.add(b'phases')
     if pushop.outdatedphases:
-        updates = [[] for p in phases.allphases]
+        updates = {p: [] for p in phases.allphases}
         updates[0].extend(h.node() for h in pushop.outdatedphases)
         phasedata = phases.binaryencode(updates)
         bundler.newpart(b'phase-heads', data=phasedata)
@@ -2658,9 +2658,9 @@ 
                     headsbyphase[phases.public].add(node(r))
 
         # transform data in a format used by the encoding function
-        phasemapping = []
-        for phase in phases.allphases:
-            phasemapping.append(sorted(headsbyphase[phase]))
+        phasemapping = {
+            phase: sorted(headsbyphase[phase]) for phase in phases.allphases
+        }
 
         # generate the actual part
         phasedata = phases.binaryencode(phasemapping)
diff --git a/mercurial/bundle2.py b/mercurial/bundle2.py
--- a/mercurial/bundle2.py
+++ b/mercurial/bundle2.py
@@ -2207,7 +2207,7 @@ 
         b'remote repository changed while pushing - please try again '
         b'(%s is %s expected %s)'
     )
-    for expectedphase, nodes in enumerate(phasetonodes):
+    for expectedphase, nodes in pycompat.iteritems(phasetonodes):
         for n in nodes:
             actualphase = phasecache.phase(unfi, cl.rev(n))
             if actualphase != expectedphase: