Patchwork [1,of,6] changegroup: avoid iterating the whole manifest

login
register
mail settings
Submitter Augie Fackler
Date Dec. 4, 2015, 7:20 p.m.
Message ID <bfdd624c667b6f72f206.1449256836@arthedain.pit.corp.google.com>
Download mbox | patch
Permalink /patch/11808/
State Superseded
Headers show

Comments

Augie Fackler - Dec. 4, 2015, 7:20 p.m.
# HG changeset patch
# User Augie Fackler <augie@google.com>
# Date 1449243298 18000
#      Fri Dec 04 10:34:58 2015 -0500
# Node ID bfdd624c667b6f72f2069a3537becad05e162c08
# Parent  30a20167ae29e1874163b59a489de0cb1d859565
# EXP-Topic cg3
changegroup: avoid iterating the whole manifest

The old code gathered the list of all files that changed anywhere in
history and then gathered changed file nodes by walking the entirety
of each manifest to be sent in order to gather changed file
nodes. That's going to be unfortunate for narrowhg, and it's probably
already inefficient for medium-to-large repositories.
Augie Fackler - Dec. 4, 2015, 7:35 p.m.
Hey folks, sorry about this, drop this round and look for a
properly-flagged v3 shortly. I missed a check-code violation.

On Fri, Dec 4, 2015 at 2:20 PM, Augie Fackler <raf@durin42.com> wrote:
> # HG changeset patch
> # User Augie Fackler <augie@google.com>
> # Date 1449243298 18000
> #      Fri Dec 04 10:34:58 2015 -0500
> # Node ID bfdd624c667b6f72f2069a3537becad05e162c08
> # Parent  30a20167ae29e1874163b59a489de0cb1d859565
> # EXP-Topic cg3
> changegroup: avoid iterating the whole manifest
>
> The old code gathered the list of all files that changed anywhere in
> history and then gathered changed file nodes by walking the entirety
> of each manifest to be sent in order to gather changed file
> nodes. That's going to be unfortunate for narrowhg, and it's probably
> already inefficient for medium-to-large repositories.
>
> diff --git a/mercurial/changegroup.py b/mercurial/changegroup.py
> --- a/mercurial/changegroup.py
> +++ b/mercurial/changegroup.py
> @@ -613,7 +613,8 @@ class cg1packer(object):
>          clrevorder = {}
>          mfs = {} # needed manifests
>          fnodes = {} # needed file nodes
> -        changedfiles = set()
> +        # maps manifest node id -> set(changed files)
> +        mfchangedfiles = {}
>
>          # Callback for the changelog, used to collect changed files and manifest
>          # nodes.
> @@ -621,9 +622,12 @@ class cg1packer(object):
>          def lookupcl(x):
>              c = cl.read(x)
>              clrevorder[x] = len(clrevorder)
> -            changedfiles.update(c[3])
> +            n = c[0]
>              # record the first changeset introducing this manifest version
> -            mfs.setdefault(c[0], x)
> +            mfs.setdefault(n, x)
> +            # Record a complete list of potentially-changed files in
> +            # this manifest.
> +            mfchangedfiles.setdefault(n, set()).update(c[3])
>              return x
>
>          self._verbosenote(_('uncompressed size of bundle content:\n'))
> @@ -668,8 +672,12 @@ class cg1packer(object):
>              clnode = mfs[x]
>              if not fastpathlinkrev:
>                  mdata = ml.readfast(x)
> -                for f, n in mdata.iteritems():
> -                    if f in changedfiles:
> +                for f in mfchangedfiles[x]:
> +                    if True:
> +                        try:
> +                            n = mdata[f]
> +                        except KeyError:
> +                            continue
>                          # record the first changeset introducing this filelog
>                          # version
>                          fclnodes = fnodes.setdefault(f, {})
> @@ -696,6 +704,9 @@ class cg1packer(object):
>                  return dict(genfilenodes())
>              return fnodes.get(fname, {})
>
> +        changedfiles = set()
> +        for x in mfchangedfiles.itervalues():
> +            changedfiles.update(x)
>          for chunk in self.generatefiles(changedfiles, linknodes, commonrevs,
>                                          source):
>              yield chunk
> _______________________________________________
> Mercurial-devel mailing list
> Mercurial-devel@selenic.com
> https://selenic.com/mailman/listinfo/mercurial-devel

Patch

diff --git a/mercurial/changegroup.py b/mercurial/changegroup.py
--- a/mercurial/changegroup.py
+++ b/mercurial/changegroup.py
@@ -613,7 +613,8 @@  class cg1packer(object):
         clrevorder = {}
         mfs = {} # needed manifests
         fnodes = {} # needed file nodes
-        changedfiles = set()
+        # maps manifest node id -> set(changed files)
+        mfchangedfiles = {}
 
         # Callback for the changelog, used to collect changed files and manifest
         # nodes.
@@ -621,9 +622,12 @@  class cg1packer(object):
         def lookupcl(x):
             c = cl.read(x)
             clrevorder[x] = len(clrevorder)
-            changedfiles.update(c[3])
+            n = c[0]
             # record the first changeset introducing this manifest version
-            mfs.setdefault(c[0], x)
+            mfs.setdefault(n, x)
+            # Record a complete list of potentially-changed files in
+            # this manifest.
+            mfchangedfiles.setdefault(n, set()).update(c[3])
             return x
 
         self._verbosenote(_('uncompressed size of bundle content:\n'))
@@ -668,8 +672,12 @@  class cg1packer(object):
             clnode = mfs[x]
             if not fastpathlinkrev:
                 mdata = ml.readfast(x)
-                for f, n in mdata.iteritems():
-                    if f in changedfiles:
+                for f in mfchangedfiles[x]:
+                    if True:
+                        try:
+                            n = mdata[f]
+                        except KeyError:
+                            continue
                         # record the first changeset introducing this filelog
                         # version
                         fclnodes = fnodes.setdefault(f, {})
@@ -696,6 +704,9 @@  class cg1packer(object):
                 return dict(genfilenodes())
             return fnodes.get(fname, {})
 
+        changedfiles = set()
+        for x in mfchangedfiles.itervalues():
+            changedfiles.update(x)
         for chunk in self.generatefiles(changedfiles, linknodes, commonrevs,
                                         source):
             yield chunk