Patchwork D9231: revlog: extend addgroup() with callback for duplicates

login
register
mail settings
Submitter phabricator
Date Oct. 20, 2020, 3:34 p.m.
Message ID <differential-rev-PHID-DREV-rmbplg6pa26x5w3ovdjn-req@mercurial-scm.org>
Download mbox | patch
Permalink /patch/47499/
State Superseded
Headers show

Comments

phabricator - Oct. 20, 2020, 3:34 p.m.
joerg.sonnenberger created this revision.
Herald added a reviewer: indygreg.
Herald added a reviewer: hg-reviewers.
Herald added a subscriber: mercurial-patches.

REVISION SUMMARY
  The addgroup() interface currently doesn't allow the caller to keep
  track of duplicated nodes except by looking at the returned node list.
  Add an optional second callback for this purpose and change the return
  type to a boolean. This allows follow-up changes to use more efficient
  storage for the node list in places that are memory-sensitive.

REPOSITORY
  rHG Mercurial

BRANCH
  default

REVISION DETAIL
  https://phab.mercurial-scm.org/D9231

AFFECTED FILES
  hgext/sqlitestore.py
  mercurial/changegroup.py
  mercurial/exchangev2.py
  mercurial/filelog.py
  mercurial/interfaces/repository.py
  mercurial/manifest.py
  mercurial/revlog.py
  mercurial/testing/storage.py
  mercurial/unionrepo.py
  tests/simplestorerepo.py

CHANGE DETAILS




To: joerg.sonnenberger, indygreg, #hg-reviewers
Cc: mercurial-patches, mercurial-devel

Patch

diff --git a/tests/simplestorerepo.py b/tests/simplestorerepo.py
--- a/tests/simplestorerepo.py
+++ b/tests/simplestorerepo.py
@@ -532,6 +532,7 @@ 
         linkmapper,
         transaction,
         addrevisioncb=None,
+        duplicaterevisioncb=None,
         maybemissingparents=False,
     ):
         if maybemissingparents:
@@ -539,7 +540,7 @@ 
                 _('simple store does not support missing parents ' 'write mode')
             )
 
-        nodes = []
+        empty = True
 
         transaction.addbackup(self._indexpath)
 
@@ -547,9 +548,10 @@ 
             linkrev = linkmapper(linknode)
             flags = flags or revlog.REVIDX_DEFAULT_FLAGS
 
-            nodes.append(node)
-
             if node in self._indexbynode:
+                if duplicaterevisioncb:
+                    duplicaterevisioncb(self, node)
+                empty = False
                 continue
 
             # Need to resolve the fulltext from the delta base.
@@ -564,7 +566,8 @@ 
 
             if addrevisioncb:
                 addrevisioncb(self, node)
-        return nodes
+            empty = False
+        return not empty
 
     def _headrevs(self):
         # Assume all revisions are heads by default.
diff --git a/mercurial/unionrepo.py b/mercurial/unionrepo.py
--- a/mercurial/unionrepo.py
+++ b/mercurial/unionrepo.py
@@ -129,6 +129,7 @@ 
         linkmapper,
         transaction,
         addrevisioncb=None,
+        duplicaterevisioncb=None,
         maybemissingparents=False,
     ):
         raise NotImplementedError
diff --git a/mercurial/testing/storage.py b/mercurial/testing/storage.py
--- a/mercurial/testing/storage.py
+++ b/mercurial/testing/storage.py
@@ -1117,7 +1117,22 @@ 
             return 0
 
         with self._maketransactionfn() as tr:
-            nodes = f.addgroup([], None, tr, addrevisioncb=cb)
+            nodes = []
+
+            def onchangeset(cl, node):
+                nodes.append(node)
+                cb(cl, node)
+
+            def ondupchangeset(cl, node):
+                nodes.append(node)
+
+            f.addgroup(
+                [],
+                None,
+                tr,
+                addrevisioncb=onchangeset,
+                duplicaterevisioncb=ondupchangeset,
+            )
 
         self.assertEqual(nodes, [])
         self.assertEqual(callbackargs, [])
@@ -1136,7 +1151,22 @@ 
         ]
 
         with self._maketransactionfn() as tr:
-            nodes = f.addgroup(deltas, linkmapper, tr, addrevisioncb=cb)
+            nodes = []
+
+            def onchangeset(cl, node):
+                nodes.append(node)
+                cb(cl, node)
+
+            def ondupchangeset(cl, node):
+                nodes.append(node)
+
+            f.addgroup(
+                deltas,
+                linkmapper,
+                tr,
+                addrevisioncb=onchangeset,
+                duplicaterevisioncb=ondupchangeset,
+            )
 
         self.assertEqual(
             nodes,
@@ -1175,7 +1205,19 @@ 
             deltas.append((nodes[i], nullid, nullid, nullid, nullid, delta, 0))
 
         with self._maketransactionfn() as tr:
-            self.assertEqual(f.addgroup(deltas, lambda x: 0, tr), nodes)
+            newnodes = []
+
+            def onchangeset(cl, node):
+                newnodes.append(node)
+
+            f.addgroup(
+                deltas,
+                lambda x: 0,
+                tr,
+                addrevisioncb=onchangeset,
+                duplicaterevisioncb=onchangeset,
+            )
+            self.assertEqual(newnodes, nodes)
 
         self.assertEqual(len(f), len(deltas))
         self.assertEqual(list(f.revs()), [0, 1, 2])
diff --git a/mercurial/revlog.py b/mercurial/revlog.py
--- a/mercurial/revlog.py
+++ b/mercurial/revlog.py
@@ -2368,7 +2368,14 @@ 
             self._enforceinlinesize(transaction, ifh)
         nodemaputil.setup_persistent_nodemap(transaction, self)
 
-    def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
+    def addgroup(
+        self,
+        deltas,
+        linkmapper,
+        transaction,
+        addrevisioncb=None,
+        duplicaterevisioncb=None,
+    ):
         """
         add a delta group
 
@@ -2383,8 +2390,6 @@ 
         if self._writinghandles:
             raise error.ProgrammingError(b'cannot nest addgroup() calls')
 
-        nodes = []
-
         r = len(self)
         end = 0
         if r:
@@ -2405,6 +2410,7 @@ 
             ifh.flush()
 
         self._writinghandles = (ifh, dfh)
+        empty = True
 
         try:
             deltacomputer = deltautil.deltacomputer(self)
@@ -2414,11 +2420,12 @@ 
                 link = linkmapper(linknode)
                 flags = flags or REVIDX_DEFAULT_FLAGS
 
-                nodes.append(node)
-
                 if self.index.has_node(node):
+                    # this can happen if two branches make the same change
                     self._nodeduplicatecallback(transaction, node)
-                    # this can happen if two branches make the same change
+                    if duplicaterevisioncb:
+                        duplicaterevisioncb(self, node)
+                    empty = False
                     continue
 
                 for p in (p1, p2):
@@ -2472,6 +2479,7 @@ 
 
                 if addrevisioncb:
                     addrevisioncb(self, node)
+                empty = False
 
                 if not dfh and not self._inline:
                     # addrevision switched from inline to conventional
@@ -2486,8 +2494,7 @@ 
             if dfh:
                 dfh.close()
             ifh.close()
-
-        return nodes
+        return not empty
 
     def iscensored(self, rev):
         """Check if a file revision is censored."""
diff --git a/mercurial/manifest.py b/mercurial/manifest.py
--- a/mercurial/manifest.py
+++ b/mercurial/manifest.py
@@ -1832,9 +1832,20 @@ 
             deltamode=deltamode,
         )
 
-    def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
+    def addgroup(
+        self,
+        deltas,
+        linkmapper,
+        transaction,
+        addrevisioncb=None,
+        duplicaterevisioncb=None,
+    ):
         return self._revlog.addgroup(
-            deltas, linkmapper, transaction, addrevisioncb=addrevisioncb
+            deltas,
+            linkmapper,
+            transaction,
+            addrevisioncb=addrevisioncb,
+            duplicaterevisioncb=duplicaterevisioncb,
         )
 
     def rawsize(self, rev):
diff --git a/mercurial/interfaces/repository.py b/mercurial/interfaces/repository.py
--- a/mercurial/interfaces/repository.py
+++ b/mercurial/interfaces/repository.py
@@ -756,6 +756,7 @@ 
         linkmapper,
         transaction,
         addrevisioncb=None,
+        duplicaterevisioncb=None,
         maybemissingparents=False,
     ):
         """Process a series of deltas for storage.
@@ -1247,7 +1248,13 @@ 
         See the documentation for ``ifiledata`` for more.
         """
 
-    def addgroup(deltas, linkmapper, transaction, addrevisioncb=None):
+    def addgroup(
+        deltas,
+        linkmapper,
+        transaction,
+        addrevisioncb=None,
+        duplicaterevisioncb=None,
+    ):
         """Process a series of deltas for storage.
 
         See the documentation in ``ifilemutation`` for more.
diff --git a/mercurial/filelog.py b/mercurial/filelog.py
--- a/mercurial/filelog.py
+++ b/mercurial/filelog.py
@@ -139,6 +139,7 @@ 
         linkmapper,
         transaction,
         addrevisioncb=None,
+        duplicaterevisioncb=None,
         maybemissingparents=False,
     ):
         if maybemissingparents:
@@ -150,7 +151,11 @@ 
             )
 
         return self._revlog.addgroup(
-            deltas, linkmapper, transaction, addrevisioncb=addrevisioncb
+            deltas,
+            linkmapper,
+            transaction,
+            addrevisioncb=addrevisioncb,
+            duplicaterevisioncb=duplicaterevisioncb,
         )
 
     def getstrippoint(self, minlink):
diff --git a/mercurial/exchangev2.py b/mercurial/exchangev2.py
--- a/mercurial/exchangev2.py
+++ b/mercurial/exchangev2.py
@@ -343,16 +343,21 @@ 
     )
 
     manifestnodes = {}
+    added = []
 
     def linkrev(node):
         repo.ui.debug(b'add changeset %s\n' % short(node))
         # Linkrev for changelog is always self.
         return len(cl)
 
+    def ondupchangeset(cl, node):
+        added.append(node)
+
     def onchangeset(cl, node):
         progress.increment()
 
         revision = cl.changelogrevision(node)
+        added.append(node)
 
         # We need to preserve the mapping of changelog revision to node
         # so we can set the linkrev accordingly when manifests are added.
@@ -403,8 +408,12 @@ 
                 0,
             )
 
-    added = cl.addgroup(
-        iterrevisions(), linkrev, weakref.proxy(tr), addrevisioncb=onchangeset
+    cl.addgroup(
+        iterrevisions(),
+        linkrev,
+        weakref.proxy(tr),
+        addrevisioncb=onchangeset,
+        duplicaterevisioncb=ondupchangeset,
     )
 
     progress.complete()
@@ -516,12 +525,15 @@ 
             # Chomp off header object.
             next(objs)
 
-            added.extend(
-                rootmanifest.addgroup(
-                    iterrevisions(objs, progress),
-                    linkrevs.__getitem__,
-                    weakref.proxy(tr),
-                )
+            def onchangeset(cl, node):
+                added.append(node)
+
+            rootmanifest.addgroup(
+                iterrevisions(objs, progress),
+                linkrevs.__getitem__,
+                weakref.proxy(tr),
+                addrevisioncb=onchangeset,
+                duplicaterevisioncb=onchangeset,
             )
 
     progress.complete()
diff --git a/mercurial/changegroup.py b/mercurial/changegroup.py
--- a/mercurial/changegroup.py
+++ b/mercurial/changegroup.py
@@ -316,20 +316,29 @@ 
             self.callback = progress.increment
 
             efilesset = set()
+            cgnodes = []
 
             def onchangelog(cl, node):
                 efilesset.update(cl.readfiles(node))
+                cgnodes.append(node)
+
+            def ondupchangelog(cl, node):
+                cgnodes.append(node)
 
             self.changelogheader()
             deltas = self.deltaiter()
-            cgnodes = cl.addgroup(deltas, csmap, trp, addrevisioncb=onchangelog)
-            efiles = len(efilesset)
-
-            if not cgnodes:
+            if not cl.addgroup(
+                deltas,
+                csmap,
+                trp,
+                addrevisioncb=onchangelog,
+                duplicaterevisioncb=ondupchangelog,
+            ):
                 repo.ui.develwarn(
                     b'applied empty changelog from changegroup',
                     config=b'warn-empty-changegroup',
                 )
+            efiles = len(efilesset)
             clend = len(cl)
             changesets = clend - clstart
             progress.complete()
diff --git a/hgext/sqlitestore.py b/hgext/sqlitestore.py
--- a/hgext/sqlitestore.py
+++ b/hgext/sqlitestore.py
@@ -674,9 +674,10 @@ 
         linkmapper,
         transaction,
         addrevisioncb=None,
+        duplicaterevisioncb=None,
         maybemissingparents=False,
     ):
-        nodes = []
+        empty = True
 
         for node, p1, p2, linknode, deltabase, delta, wireflags in deltas:
             storeflags = 0
@@ -715,8 +716,6 @@ 
 
             linkrev = linkmapper(linknode)
 
-            nodes.append(node)
-
             if node in self._revisions:
                 # Possibly reset parents to make them proper.
                 entry = self._revisions[node]
@@ -741,6 +740,9 @@ 
                         (self._nodetorev[p1], entry.flags, entry.rid),
                     )
 
+                if duplicaterevisioncb:
+                    duplicaterevisioncb(self, node)
+                empty = False
                 continue
 
             if deltabase == nullid:
@@ -763,8 +765,9 @@ 
 
             if addrevisioncb:
                 addrevisioncb(self, node)
+            empty = False
 
-        return nodes
+        return not empty
 
     def censorrevision(self, tr, censornode, tombstone=b''):
         tombstone = storageutil.packmeta({b'censored': tombstone}, b'')