Patchwork D10344: sidedata: enable sidedata computers to optionally rewrite flags

login
register
mail settings
Submitter phabricator
Date April 9, 2021, 8:51 a.m.
Message ID <differential-rev-PHID-DREV-zntw76rkt6h7wthtm6r4-req@mercurial-scm.org>
Download mbox | patch
Permalink /patch/48667/
State Superseded
Headers show

Comments

phabricator - April 9, 2021, 8:51 a.m.
Alphare created this revision.
Herald added a reviewer: indygreg.
Herald added a reviewer: hg-reviewers.
Herald added a subscriber: mercurial-patches.

REVISION SUMMARY
  Sidedata computers may want to influence the flags of the revision they touch.
  For example, the computer for changelog-based copytracing can add a flag to
  signify that this revision might affect copytracing, inversely removing said
  flag if the information is no longer applicable.
  
  See inline documentation in `storageutil` for more details.

REPOSITORY
  rHG Mercurial

BRANCH
  default

REVISION DETAIL
  https://phab.mercurial-scm.org/D10344

AFFECTED FILES
  mercurial/cext/revlog.c
  mercurial/interfaces/repository.py
  mercurial/localrepo.py
  mercurial/metadata.py
  mercurial/pure/parsers.py
  mercurial/revlog.py
  mercurial/utils/storageutil.py
  tests/testlib/ext-sidedata-2.py
  tests/testlib/ext-sidedata-3.py
  tests/testlib/ext-sidedata-5.py

CHANGE DETAILS




To: Alphare, indygreg, #hg-reviewers
Cc: mercurial-patches, mercurial-devel

Patch

diff --git a/tests/testlib/ext-sidedata-5.py b/tests/testlib/ext-sidedata-5.py
--- a/tests/testlib/ext-sidedata-5.py
+++ b/tests/testlib/ext-sidedata-5.py
@@ -22,13 +22,15 @@ 
 
 from mercurial.revlogutils import sidedata as sidedatamod
 
+NO_FLAGS = (0, 0)
+
 
 def compute_sidedata_1(repo, revlog, rev, sidedata, text=None):
     sidedata = sidedata.copy()
     if text is None:
         text = revlog.revision(rev)
     sidedata[sidedatamod.SD_TEST1] = struct.pack('>I', len(text))
-    return sidedata
+    return sidedata, NO_FLAGS
 
 
 def compute_sidedata_2(repo, revlog, rev, sidedata, text=None):
@@ -37,7 +39,7 @@ 
         text = revlog.revision(rev)
     sha256 = hashlib.sha256(text).digest()
     sidedata[sidedatamod.SD_TEST2] = struct.pack('>32s', sha256)
-    return sidedata
+    return sidedata, NO_FLAGS
 
 
 def reposetup(ui, repo):
@@ -48,12 +50,14 @@ 
             sidedatamod.SD_TEST1,
             (sidedatamod.SD_TEST1,),
             compute_sidedata_1,
+            0,
         )
         repo.register_sidedata_computer(
             kind,
             sidedatamod.SD_TEST2,
             (sidedatamod.SD_TEST2,),
             compute_sidedata_2,
+            0,
         )
 
     # We don't register sidedata computers because we don't care within these
diff --git a/tests/testlib/ext-sidedata-3.py b/tests/testlib/ext-sidedata-3.py
--- a/tests/testlib/ext-sidedata-3.py
+++ b/tests/testlib/ext-sidedata-3.py
@@ -21,13 +21,15 @@ 
 
 from mercurial.revlogutils import sidedata as sidedatamod
 
+NO_FLAGS = (0, 0)
+
 
 def compute_sidedata_1(repo, revlog, rev, sidedata, text=None):
     sidedata = sidedata.copy()
     if text is None:
         text = revlog.revision(rev)
     sidedata[sidedatamod.SD_TEST1] = struct.pack('>I', len(text))
-    return sidedata
+    return sidedata, NO_FLAGS
 
 
 def compute_sidedata_2(repo, revlog, rev, sidedata, text=None):
@@ -36,7 +38,7 @@ 
         text = revlog.revision(rev)
     sha256 = hashlib.sha256(text).digest()
     sidedata[sidedatamod.SD_TEST2] = struct.pack('>32s', sha256)
-    return sidedata
+    return sidedata, NO_FLAGS
 
 
 def compute_sidedata_3(repo, revlog, rev, sidedata, text=None):
@@ -45,7 +47,7 @@ 
         text = revlog.revision(rev)
     sha384 = hashlib.sha384(text).digest()
     sidedata[sidedatamod.SD_TEST3] = struct.pack('>48s', sha384)
-    return sidedata
+    return sidedata, NO_FLAGS
 
 
 def wrapaddrevision(
@@ -54,8 +56,8 @@ 
     if kwargs.get('sidedata') is None:
         kwargs['sidedata'] = {}
     sd = kwargs['sidedata']
-    sd = compute_sidedata_1(None, self, None, sd, text=text)
-    kwargs['sidedata'] = compute_sidedata_2(None, self, None, sd, text=text)
+    sd, flags = compute_sidedata_1(None, self, None, sd, text=text)
+    kwargs['sidedata'] = compute_sidedata_2(None, self, None, sd, text=text)[0]
     return orig(self, text, transaction, link, p1, p2, *args, **kwargs)
 
 
@@ -71,18 +73,21 @@ 
             sidedatamod.SD_TEST1,
             (sidedatamod.SD_TEST1,),
             compute_sidedata_1,
+            0,
         )
         repo.register_sidedata_computer(
             kind,
             sidedatamod.SD_TEST2,
             (sidedatamod.SD_TEST2,),
             compute_sidedata_2,
+            0,
         )
         repo.register_sidedata_computer(
             kind,
             sidedatamod.SD_TEST3,
             (sidedatamod.SD_TEST3,),
             compute_sidedata_3,
+            0,
         )
     repo.register_wanted_sidedata(sidedatamod.SD_TEST1)
     repo.register_wanted_sidedata(sidedatamod.SD_TEST2)
diff --git a/tests/testlib/ext-sidedata-2.py b/tests/testlib/ext-sidedata-2.py
--- a/tests/testlib/ext-sidedata-2.py
+++ b/tests/testlib/ext-sidedata-2.py
@@ -15,13 +15,15 @@ 
 
 from mercurial.revlogutils import sidedata as sidedatamod
 
+NO_FLAGS = (0, 0)  # hoot
+
 
 def compute_sidedata_1(repo, revlog, rev, sidedata, text=None):
     sidedata = sidedata.copy()
     if text is None:
         text = revlog.revision(rev)
     sidedata[sidedatamod.SD_TEST1] = struct.pack('>I', len(text))
-    return sidedata
+    return sidedata, NO_FLAGS
 
 
 def compute_sidedata_2(repo, revlog, rev, sidedata, text=None):
@@ -30,7 +32,7 @@ 
         text = revlog.revision(rev)
     sha256 = hashlib.sha256(text).digest()
     sidedata[sidedatamod.SD_TEST2] = struct.pack('>32s', sha256)
-    return sidedata
+    return sidedata, NO_FLAGS
 
 
 def reposetup(ui, repo):
@@ -41,10 +43,12 @@ 
             sidedatamod.SD_TEST1,
             (sidedatamod.SD_TEST1,),
             compute_sidedata_1,
+            0,
         )
         repo.register_sidedata_computer(
             kind,
             sidedatamod.SD_TEST2,
             (sidedatamod.SD_TEST2,),
             compute_sidedata_2,
+            0,
         )
diff --git a/mercurial/utils/storageutil.py b/mercurial/utils/storageutil.py
--- a/mercurial/utils/storageutil.py
+++ b/mercurial/utils/storageutil.py
@@ -363,12 +363,17 @@ 
         If not None, means that sidedata should be included.
         A dictionary of revlog type to tuples of `(repo, computers, removers)`:
             * `repo` is used as an argument for computers
-            * `computers` is a list of `(category, (keys, computer)` that
+            * `computers` is a list of `(category, (keys, computer, flags)` that
                compute the missing sidedata categories that were asked:
                * `category` is the sidedata category
                * `keys` are the sidedata keys to be affected
+               * `flags` is a bitmask (an integer) of flags to remove when
+                  removing the category.
                * `computer` is the function `(repo, store, rev, sidedata)` that
-                 returns a new sidedata dict.
+                 returns a tuple of
+                 `(new sidedata dict, (flags to add, flags to remove))`.
+                 For example, it will return `({}, (0, 1 << 15))` to return no
+                 sidedata, with no flags to add and one flag to remove.
             * `removers` will remove the keys corresponding to the categories
               that are present, but not needed.
         If both `computers` and `removers` are empty, sidedata are simply not
@@ -488,12 +493,13 @@ 
                 available.add(rev)
 
         serialized_sidedata = None
+        sidedata_flags = (0, 0)
         if sidedata_helpers:
-            sidedata = store.sidedata(rev)
-            sidedata = run_sidedata_helpers(
+            old_sidedata = store.sidedata(rev)
+            sidedata, sidedata_flags = run_sidedata_helpers(
                 store=store,
                 sidedata_helpers=sidedata_helpers,
-                sidedata=sidedata,
+                sidedata=old_sidedata,
                 rev=rev,
             )
             if sidedata:
@@ -503,6 +509,9 @@ 
         if serialized_sidedata:
             # Advertise that sidedata exists to the other side
             flags |= constants.REVIDX_SIDEDATA
+            # Computers and removers can return flags to add and/or remove
+            flags = flags | sidedata_flags[0] & ~sidedata_flags[1]
+
         yield resultcls(
             node=node,
             p1node=fnode(p1rev),
@@ -529,12 +538,17 @@ 
     """
     repo, sd_computers, sd_removers = sidedata_helpers
     kind = store.revlog_kind
-    for _keys, sd_computer in sd_computers.get(kind, []):
-        sidedata = sd_computer(repo, store, rev, sidedata)
-    for keys, _computer in sd_removers.get(kind, []):
+    flags_to_add = 0
+    flags_to_remove = 0
+    for _keys, sd_computer, _flags in sd_computers.get(kind, []):
+        sidedata, flags = sd_computer(repo, store, rev, sidedata)
+        flags_to_add |= flags[0]
+        flags_to_remove |= flags[1]
+    for keys, _computer, flags in sd_removers.get(kind, []):
         for key in keys:
             sidedata.pop(key, None)
-    return sidedata
+        flags_to_remove |= flags
+    return sidedata, (flags_to_add, flags_to_remove)
 
 
 def deltaiscensored(delta, baserev, baselenfn):
diff --git a/mercurial/revlog.py b/mercurial/revlog.py
--- a/mercurial/revlog.py
+++ b/mercurial/revlog.py
@@ -3216,7 +3216,7 @@ 
             current_offset = fp.tell()
             for rev in range(startrev, endrev + 1):
                 entry = self.index[rev]
-                new_sidedata = storageutil.run_sidedata_helpers(
+                new_sidedata, flags = storageutil.run_sidedata_helpers(
                     store=self,
                     sidedata_helpers=helpers,
                     sidedata={},
@@ -3232,7 +3232,11 @@ 
                     # revlog.
                     msg = b"Rewriting existing sidedata is not supported yet"
                     raise error.Abort(msg)
-                entry = entry[:8]
+
+                # Apply (potential) flags to add and to remove after running
+                # the sidedata helpers
+                new_offset_flags = entry[0] | flags[0] & ~flags[1]
+                entry = (new_offset_flags,) + entry[1:8]
                 entry += (current_offset, len(serialized_sidedata))
 
                 fp.write(serialized_sidedata)
@@ -3242,8 +3246,8 @@ 
         # rewrite the new index entries
         with self._indexfp(b'w+') as fp:
             fp.seek(startrev * self._io.size)
-            for i, entry in enumerate(new_entries):
+            for i, e in enumerate(new_entries):
                 rev = startrev + i
-                self.index.replace_sidedata_info(rev, entry[8], entry[9])
-                packed = self._io.packentry(entry, self.node, self.version, rev)
+                self.index.replace_sidedata_info(rev, e[8], e[9], e[0])
+                packed = self._io.packentry(e, self.node, self.version, rev)
                 fp.write(packed)
diff --git a/mercurial/pure/parsers.py b/mercurial/pure/parsers.py
--- a/mercurial/pure/parsers.py
+++ b/mercurial/pure/parsers.py
@@ -247,7 +247,9 @@ 
     index_size = revlog_constants.INDEX_ENTRY_V2.size
     null_item = (0, 0, 0, -1, -1, -1, -1, nullid, 0, 0)
 
-    def replace_sidedata_info(self, i, sidedata_offset, sidedata_length):
+    def replace_sidedata_info(
+        self, i, sidedata_offset, sidedata_length, offset_flags
+    ):
         """
         Replace an existing index entry's sidedata offset and length with new
         ones.
@@ -262,7 +264,8 @@ 
         if i >= self._lgt:
             packed = _pack(sidedata_format, sidedata_offset, sidedata_length)
             old = self._extra[i - self._lgt]
-            new = old[:64] + packed + old[64 + packed_size :]
+            offset_flags = struct.pack(b">Q", offset_flags)
+            new = offset_flags + old[8:64] + packed + old[64 + packed_size :]
             self._extra[i - self._lgt] = new
         else:
             msg = b"cannot rewrite entries outside of this transaction"
diff --git a/mercurial/metadata.py b/mercurial/metadata.py
--- a/mercurial/metadata.py
+++ b/mercurial/metadata.py
@@ -822,7 +822,9 @@ 
 
 
 def copies_sidedata_computer(repo, revlog, rev, existing_sidedata):
-    return _getsidedata(repo, rev)[0]
+    sidedata, has_copies_info = _getsidedata(repo, rev)
+    flags_to_add = sidedataflag.REVIDX_HASCOPIESINFO if has_copies_info else 0
+    return sidedata, (flags_to_add, 0)
 
 
 def set_sidedata_spec_for_repo(repo):
@@ -833,6 +835,7 @@ 
         sidedatamod.SD_FILES,
         (sidedatamod.SD_FILES,),
         copies_sidedata_computer,
+        sidedataflag.REVIDX_HASCOPIESINFO,
     )
 
 
diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py
--- a/mercurial/localrepo.py
+++ b/mercurial/localrepo.py
@@ -3365,7 +3365,7 @@ 
             return
         self._wanted_sidedata.add(pycompat.bytestr(category))
 
-    def register_sidedata_computer(self, kind, category, keys, computer):
+    def register_sidedata_computer(self, kind, category, keys, computer, flags):
         if kind not in (b"changelog", b"manifest", b"filelog"):
             msg = _(b"unexpected revlog kind '%s'.")
             raise error.ProgrammingError(msg % kind)
@@ -3376,7 +3376,7 @@ 
             )
             raise error.ProgrammingError(msg % category)
         self._sidedata_computers.setdefault(kind, {})
-        self._sidedata_computers[kind][category] = (keys, computer)
+        self._sidedata_computers[kind][category] = (keys, computer, flags)
 
 
 # used to avoid circular references so destructors work
diff --git a/mercurial/interfaces/repository.py b/mercurial/interfaces/repository.py
--- a/mercurial/interfaces/repository.py
+++ b/mercurial/interfaces/repository.py
@@ -1851,7 +1851,7 @@ 
     def savecommitmessage(text):
         pass
 
-    def register_sidedata_computer(kind, category, keys, computer):
+    def register_sidedata_computer(kind, category, keys, computer, flags):
         pass
 
     def register_wanted_sidedata(category):
diff --git a/mercurial/cext/revlog.c b/mercurial/cext/revlog.c
--- a/mercurial/cext/revlog.c
+++ b/mercurial/cext/revlog.c
@@ -462,14 +462,14 @@ 
    inside the transaction that creates the given revision. */
 static PyObject *index_replace_sidedata_info(indexObject *self, PyObject *args)
 {
-	uint64_t sidedata_offset;
+	uint64_t offset_flags, sidedata_offset;
 	int rev;
 	Py_ssize_t sidedata_comp_len;
 	char *data;
 #if LONG_MAX == 0x7fffffffL
-	const char *const sidedata_format = PY23("nKi", "nKi");
+	const char *const sidedata_format = PY23("nKiK", "nKiK");
 #else
-	const char *const sidedata_format = PY23("nki", "nki");
+	const char *const sidedata_format = PY23("nkik", "nkik");
 #endif
 
 	if (self->hdrsize == v1_hdrsize || self->inlined) {
@@ -484,7 +484,7 @@ 
 	}
 
 	if (!PyArg_ParseTuple(args, sidedata_format, &rev, &sidedata_offset,
-	                      &sidedata_comp_len))
+	                      &sidedata_comp_len, &offset_flags))
 		return NULL;
 
 	if (rev < 0 || rev >= index_length(self)) {
@@ -501,6 +501,7 @@ 
 	/* Find the newly added node, offset from the "already on-disk" length
 	 */
 	data = self->added + self->hdrsize * (rev - self->length);
+	putbe64(offset_flags, data);
 	putbe64(sidedata_offset, data + 64);
 	putbe32(sidedata_comp_len, data + 72);