Patchwork D10359: sidedata: replace sidedata upgrade mechanism with the new one

login
register
mail settings
Submitter phabricator
Date April 10, 2021, 7:16 p.m.
Message ID <differential-rev-PHID-DREV-rbvnwxwtilffwgo3drxd-req@mercurial-scm.org>
Download mbox | patch
Permalink /patch/48676/
State Superseded
Headers show

Comments

phabricator - April 10, 2021, 7:16 p.m.
Alphare created this revision.
Herald added a reviewer: indygreg.
Herald added a reviewer: hg-reviewers.
Herald added a subscriber: mercurial-patches.

REVISION SUMMARY
  Note: this is split into a separate change (like some other patches in this
  series) because it's not easy to have all patches work 100% and this seemed
  easier for reviewers.
  
  When cloning or upgrading a repo, we may need to compute (or remove) sidedata.
  This is the same mechanism that is used in exchange, so we re-use the new
  system to simplify the code and fix the remaining issues (correctly dropping
  flags and handling partial removal, etc.).
  
  This also highlighted an issue with `test-copies-in-changeset.t` that kept
  sidedata categories that are not relevant anymore. They should probably be
  dropped entirely, but that would be for another patch.

REPOSITORY
  rHG Mercurial

BRANCH
  default

REVISION DETAIL
  https://phab.mercurial-scm.org/D10359

AFFECTED FILES
  mercurial/metadata.py
  mercurial/revlog.py
  mercurial/upgrade_utils/engine.py
  tests/test-copies-in-changeset.t
  tests/testlib/ext-sidedata.py

CHANGE DETAILS




To: Alphare, indygreg, #hg-reviewers
Cc: mercurial-patches, mercurial-devel

Patch

diff --git a/tests/testlib/ext-sidedata.py b/tests/testlib/ext-sidedata.py
--- a/tests/testlib/ext-sidedata.py
+++ b/tests/testlib/ext-sidedata.py
@@ -15,6 +15,7 @@ 
     nullrev,
 )
 from mercurial import (
+    changegroup,
     extensions,
     requirements,
     revlog,
@@ -57,13 +58,15 @@ 
     return text, sd
 
 
-def wrapgetsidedatacompanion(orig, srcrepo, dstrepo):
-    sidedatacompanion = orig(srcrepo, dstrepo)
+def wrapget_sidedata_helpers(orig, srcrepo, dstrepo):
+    repo, computers, removers = orig(srcrepo, dstrepo)
+    assert not computers and not removers  # deal with composition later
     addedreqs = dstrepo.requirements - srcrepo.requirements
+
     if requirements.SIDEDATA_REQUIREMENT in addedreqs:
-        assert sidedatacompanion is None  # deal with composition later
 
-        def sidedatacompanion(revlog, rev):
+        def computer(repo, revlog, rev, old_sidedata):
+            assert not old_sidedata  # not supported yet
             update = {}
             revlog.sidedatanocheck = True
             try:
@@ -76,16 +79,25 @@ 
             # and sha2 hashes
             sha256 = hashlib.sha256(text).digest()
             update[sidedata.SD_TEST2] = struct.pack('>32s', sha256)
-            return False, (), update, 0, 0
+            return update, (0, 0)
 
-    return sidedatacompanion
+        srcrepo.register_sidedata_computer(
+            b"changelog",
+            b"whatever",
+            (sidedata.SD_TEST1, sidedata.SD_TEST2),
+            computer,
+            0,
+        )
+        dstrepo.register_wanted_sidedata(b"whatever")
+
+    return changegroup.get_sidedata_helpers(srcrepo, dstrepo._wanted_sidedata)
 
 
 def extsetup(ui):
     extensions.wrapfunction(revlog.revlog, 'addrevision', wrapaddrevision)
     extensions.wrapfunction(revlog.revlog, '_revisiondata', wrap_revisiondata)
     extensions.wrapfunction(
-        upgrade_engine, 'getsidedatacompanion', wrapgetsidedatacompanion
+        upgrade_engine, 'get_sidedata_helpers', wrapget_sidedata_helpers
     )
 
 
diff --git a/tests/test-copies-in-changeset.t b/tests/test-copies-in-changeset.t
--- a/tests/test-copies-in-changeset.t
+++ b/tests/test-copies-in-changeset.t
@@ -417,7 +417,7 @@ 
 Test upgrading/downgrading to sidedata storage
 ==============================================
 
-downgrading (keeping some sidedata)
+downgrading
 
   $ hg debugformat -v
   format-variant     repo config default
@@ -461,11 +461,7 @@ 
   compression:        zstd   zstd    zstd (zstd !)
   compression-level:  default default default
   $ hg debugsidedata -c -- 0
-  1 sidedata entries
-   entry-0014 size 14
   $ hg debugsidedata -c -- 1
-  1 sidedata entries
-   entry-0014 size 14
   $ hg debugsidedata -m -- 0
 
 upgrading
diff --git a/mercurial/upgrade_utils/engine.py b/mercurial/upgrade_utils/engine.py
--- a/mercurial/upgrade_utils/engine.py
+++ b/mercurial/upgrade_utils/engine.py
@@ -12,6 +12,7 @@ 
 from ..i18n import _
 from ..pycompat import getattr
 from .. import (
+    changegroup,
     changelog,
     error,
     filelog,
@@ -19,12 +20,30 @@ 
     metadata,
     pycompat,
     requirements,
-    revlog,
     scmutil,
     util,
     vfs as vfsmod,
 )
-from ..revlogutils import nodemap
+from ..revlogutils import (
+    flagutil,
+    nodemap,
+    sidedata as sidedatamod,
+)
+
+
+def get_sidedata_helpers(srcrepo, dstrepo):
+    use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')
+    sequential = pycompat.iswindows or not use_w
+    if not sequential:
+        srcrepo.register_sidedata_computer(
+            b"changelog",
+            sidedatamod.SD_FILES,
+            (sidedatamod.SD_FILES,),
+            metadata._get_worker_sidedata_adder(srcrepo, dstrepo),
+            flagutil.REVIDX_HASCOPIESINFO,
+            replace=True,
+        )
+    return changegroup.get_sidedata_helpers(srcrepo, dstrepo._wanted_sidedata)
 
 
 def _revlogfrompath(repo, path):
@@ -88,25 +107,6 @@ 
 )
 
 
-def getsidedatacompanion(srcrepo, dstrepo):
-    sidedatacompanion = None
-    removedreqs = srcrepo.requirements - dstrepo.requirements
-    addedreqs = dstrepo.requirements - srcrepo.requirements
-    if requirements.SIDEDATA_REQUIREMENT in removedreqs:
-
-        def sidedatacompanion(rl, rev):
-            rl = getattr(rl, '_revlog', rl)
-            if rl.flags(rev) & revlog.REVIDX_SIDEDATA:
-                return True, (), {}, 0, 0
-            return False, (), {}, 0, 0
-
-    elif requirements.COPIESSDC_REQUIREMENT in addedreqs:
-        sidedatacompanion = metadata.getsidedataadder(srcrepo, dstrepo)
-    elif requirements.COPIESSDC_REQUIREMENT in removedreqs:
-        sidedatacompanion = metadata.getsidedataremover(srcrepo, dstrepo)
-    return sidedatacompanion
-
-
 def matchrevlog(revlogfilter, entry):
     """check if a revlog is selected for cloning.
 
@@ -128,7 +128,7 @@ 
     old_revlog,
     unencoded,
     upgrade_op,
-    sidedatacompanion,
+    sidedata_helpers,
     oncopiedrevision,
 ):
     """ returns the new revlog object created"""
@@ -144,7 +144,7 @@ 
             addrevisioncb=oncopiedrevision,
             deltareuse=upgrade_op.delta_reuse_mode,
             forcedeltabothparents=upgrade_op.force_re_delta_both_parents,
-            sidedatacompanion=sidedatacompanion,
+            sidedata_helpers=sidedata_helpers,
         )
     else:
         msg = _(b'blindly copying %s containing %i revisions\n')
@@ -254,7 +254,7 @@ 
     def oncopiedrevision(rl, rev, node):
         progress.increment()
 
-    sidedatacompanion = getsidedatacompanion(srcrepo, dstrepo)
+    sidedata_helpers = get_sidedata_helpers(srcrepo, dstrepo)
 
     # Migrating filelogs
     ui.status(
@@ -278,7 +278,7 @@ 
             oldrl,
             unencoded,
             upgrade_op,
-            sidedatacompanion,
+            sidedata_helpers,
             oncopiedrevision,
         )
         info = newrl.storageinfo(storedsize=True)
@@ -317,7 +317,7 @@ 
             oldrl,
             unencoded,
             upgrade_op,
-            sidedatacompanion,
+            sidedata_helpers,
             oncopiedrevision,
         )
         info = newrl.storageinfo(storedsize=True)
@@ -355,7 +355,7 @@ 
             oldrl,
             unencoded,
             upgrade_op,
-            sidedatacompanion,
+            sidedata_helpers,
             oncopiedrevision,
         )
         info = newrl.storageinfo(storedsize=True)
diff --git a/mercurial/revlog.py b/mercurial/revlog.py
--- a/mercurial/revlog.py
+++ b/mercurial/revlog.py
@@ -91,6 +91,7 @@ 
 
 # blanked usage of all the name to prevent pyflakes constraints
 # We need these name available in the module for extensions.
+
 REVLOGV0
 REVLOGV1
 REVLOGV2
@@ -2117,7 +2118,7 @@ 
                 _(b"attempted to add linkrev -1 to %s") % self.indexfile
             )
 
-        if sidedata is None:
+        if not sidedata:
             sidedata = {}
         elif not self.hassidedata:
             raise error.ProgrammingError(
@@ -2774,7 +2775,7 @@ 
         addrevisioncb=None,
         deltareuse=DELTAREUSESAMEREVS,
         forcedeltabothparents=None,
-        sidedatacompanion=None,
+        sidedata_helpers=None,
     ):
         """Copy this revlog to another, possibly with format changes.
 
@@ -2817,21 +2818,7 @@ 
         argument controls whether to force compute deltas against both parents
         for merges. By default, the current default is used.
 
-        If not None, the `sidedatacompanion` is callable that accept two
-        arguments:
-
-            (srcrevlog, rev)
-
-        and return a quintet that control changes to sidedata content from the
-        old revision to the new clone result:
-
-            (dropall, filterout, update, new_flags, dropped_flags)
-
-        * if `dropall` is True, all sidedata should be dropped
-        * `filterout` is a set of sidedata keys that should be dropped
-        * `update` is a mapping of additionnal/new key -> value
-        * new_flags is a bitfields of new flags that the revision should get
-        * dropped_flags is a bitfields of new flags that the revision shoudl not longer have
+        See `storageutil.emitrevisions` for the doc on `sidedata_helpers`.
         """
         if deltareuse not in self.DELTAREUSEALL:
             raise ValueError(
@@ -2871,7 +2858,7 @@ 
                 addrevisioncb,
                 deltareuse,
                 forcedeltabothparents,
-                sidedatacompanion,
+                sidedata_helpers,
             )
 
         finally:
@@ -2886,7 +2873,7 @@ 
         addrevisioncb,
         deltareuse,
         forcedeltabothparents,
-        sidedatacompanion,
+        sidedata_helpers,
     ):
         """perform the core duty of `revlog.clone` after parameter processing"""
         deltacomputer = deltautil.deltacomputer(destrevlog)
@@ -2902,31 +2889,18 @@ 
             p2 = index[entry[6]][7]
             node = entry[7]
 
-            sidedataactions = (False, [], {}, 0, 0)
-            if sidedatacompanion is not None:
-                sidedataactions = sidedatacompanion(self, rev)
-
             # (Possibly) reuse the delta from the revlog if allowed and
             # the revlog chunk is a delta.
             cachedelta = None
             rawtext = None
-            if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
-                dropall = sidedataactions[0]
-                filterout = sidedataactions[1]
-                update = sidedataactions[2]
-                new_flags = sidedataactions[3]
-                dropped_flags = sidedataactions[4]
+            if deltareuse == self.DELTAREUSEFULLADD:
                 text, sidedata = self._revisiondata(rev)
-                if dropall:
-                    sidedata = {}
-                for key in filterout:
-                    sidedata.pop(key, None)
-                sidedata.update(update)
-                if not sidedata:
-                    sidedata = None
-
-                flags |= new_flags
-                flags &= ~dropped_flags
+
+                if sidedata_helpers is not None:
+                    (sidedata, new_flags) = storageutil.run_sidedata_helpers(
+                        self, sidedata_helpers, sidedata, rev
+                    )
+                    flags = flags | new_flags[0] & ~new_flags[1]
 
                 destrevlog.addrevision(
                     text,
@@ -2946,8 +2920,17 @@ 
                     if dp != nullrev:
                         cachedelta = (dp, bytes(self._chunk(rev)))
 
+                sidedata = None
                 if not cachedelta:
-                    rawtext = self.rawdata(rev)
+                    rawtext, sidedata = self._revisiondata(rev)
+                if sidedata is None:
+                    sidedata = self.sidedata(rev)
+
+                if sidedata_helpers is not None:
+                    (sidedata, new_flags) = storageutil.run_sidedata_helpers(
+                        self, sidedata_helpers, sidedata, rev
+                    )
+                    flags = flags | new_flags[0] & ~new_flags[1]
 
                 ifh = destrevlog.opener(
                     destrevlog.indexfile, b'a+', checkambig=False
@@ -2968,6 +2951,7 @@ 
                         ifh,
                         dfh,
                         deltacomputer=deltacomputer,
+                        sidedata=sidedata,
                     )
                 finally:
                     if dfh:
diff --git a/mercurial/metadata.py b/mercurial/metadata.py
--- a/mercurial/metadata.py
+++ b/mercurial/metadata.py
@@ -17,7 +17,6 @@ 
 )
 from . import (
     error,
-    pycompat,
     requirements as requirementsmod,
     util,
 )
@@ -839,14 +838,6 @@ 
     )
 
 
-def getsidedataadder(srcrepo, destrepo):
-    use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')
-    if pycompat.iswindows or not use_w:
-        return _get_simple_sidedata_adder(srcrepo, destrepo)
-    else:
-        return _get_worker_sidedata_adder(srcrepo, destrepo)
-
-
 def _sidedata_worker(srcrepo, revs_queue, sidedata_queue, tokens):
     """The function used by worker precomputing sidedata
 
@@ -913,57 +904,21 @@ 
     # received, when shelve 43 for later use.
     staging = {}
 
-    def sidedata_companion(revlog, rev):
-        data = {}, False
-        if util.safehasattr(revlog, b'filteredrevs'):  # this is a changelog
-            # Is the data previously shelved ?
-            data = staging.pop(rev, None)
-            if data is None:
-                # look at the queued result until we find the one we are lookig
-                # for (shelve the other ones)
+    def sidedata_companion(repo, revlog, rev, old_sidedata):
+        # Is the data previously shelved ?
+        data = staging.pop(rev, None)
+        if data is None:
+            # look at the queued result until we find the one we are lookig
+            # for (shelve the other ones)
+            r, data = sidedataq.get()
+            while r != rev:
+                staging[r] = data
                 r, data = sidedataq.get()
-                while r != rev:
-                    staging[r] = data
-                    r, data = sidedataq.get()
-            tokens.release()
+        tokens.release()
         sidedata, has_copies_info = data
         new_flag = 0
         if has_copies_info:
             new_flag = sidedataflag.REVIDX_HASCOPIESINFO
-        return False, (), sidedata, new_flag, 0
+        return sidedata, (new_flag, 0)
 
     return sidedata_companion
-
-
-def _get_simple_sidedata_adder(srcrepo, destrepo):
-    """The simple version of the sidedata computation
-
-    It just compute it in the same thread on request"""
-
-    def sidedatacompanion(revlog, rev):
-        sidedata, has_copies_info = {}, False
-        if util.safehasattr(revlog, 'filteredrevs'):  # this is a changelog
-            sidedata, has_copies_info = _getsidedata(srcrepo, rev)
-        new_flag = 0
-        if has_copies_info:
-            new_flag = sidedataflag.REVIDX_HASCOPIESINFO
-
-        return False, (), sidedata, new_flag, 0
-
-    return sidedatacompanion
-
-
-def getsidedataremover(srcrepo, destrepo):
-    def sidedatacompanion(revlog, rev):
-        f = ()
-        if util.safehasattr(revlog, 'filteredrevs'):  # this is a changelog
-            if revlog.flags(rev) & sidedataflag.REVIDX_SIDEDATA:
-                f = (
-                    sidedatamod.SD_P1COPIES,
-                    sidedatamod.SD_P2COPIES,
-                    sidedatamod.SD_FILESADDED,
-                    sidedatamod.SD_FILESREMOVED,
-                )
-        return False, f, {}, 0, sidedataflag.REVIDX_HASCOPIESINFO
-
-    return sidedatacompanion