Patchwork [2,of,4] revbranchcache: add a bundle2 handler for a rbc part

login
register
mail settings
Submitter Boris Feld
Date Jan. 18, 2018, 3:38 p.m.
Message ID <9c1ad82226a2e1fc7ca6.1516289925@FB>
Download mbox | patch
Permalink /patch/26905/
State Accepted
Headers show

Comments

Boris Feld - Jan. 18, 2018, 3:38 p.m.
# HG changeset patch
# User Boris Feld <boris.feld@octobus.net>
# Date 1516282142 -3600
#      Thu Jan 18 14:29:02 2018 +0100
# Node ID 9c1ad82226a2e1fc7ca69550a46a7f7329c6b579
# Parent  4ad1a1054450063cc9aa19ab2037722c64877eb7
# EXP-Topic wire-rbc
# Available At https://bitbucket.org/octobus/mercurial-devel/
#              hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 9c1ad82226a2
revbranchcache: add a bundle2 handler for a rbc part

We add the necessary bit to process a part containing rev-branch-cache
information. The local rev branch cache is then updated with the received
information.

Computing branch cache can become a signifiant part of the time spent pulling so
this is expected to provide worthwhile speedup.
Yuya Nishihara - Jan. 19, 2018, 2:54 p.m.
On Thu, 18 Jan 2018 16:38:45 +0100, Boris Feld wrote:
> # HG changeset patch
> # User Boris Feld <boris.feld@octobus.net>
> # Date 1516282142 -3600
> #      Thu Jan 18 14:29:02 2018 +0100
> # Node ID 9c1ad82226a2e1fc7ca69550a46a7f7329c6b579
> # Parent  4ad1a1054450063cc9aa19ab2037722c64877eb7
> # EXP-Topic wire-rbc
> # Available At https://bitbucket.org/octobus/mercurial-devel/
> #              hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 9c1ad82226a2
> revbranchcache: add a bundle2 handler for a rbc part

> diff --git a/mercurial/bundle2.py b/mercurial/bundle2.py
> --- a/mercurial/bundle2.py
> +++ b/mercurial/bundle2.py
> @@ -2101,6 +2101,40 @@ def handlehgtagsfnodes(op, inpart):
>      cache.write()
>      op.ui.debug('applied %i hgtags fnodes cache entries\n' % count)
>  
> +rbcstruct = struct.Struct('>III')
> +
> +@parthandler('cache:rev-branch-cache')
> +def handlerbc(op, inpart):
> +    """receive a rev-branch-cache payload and update the local cache
> +
> +    The payload is a series of data related to each branch
> +
> +    1) branch name length
> +    2) number of open heads
> +    3) number of closed heads
> +    4) open heads nodes
> +    5) closed heads nodes
> +    """
> +    total = 0
> +    rawheader = inpart.read(rbcstruct.size)
> +    cache = op.repo.revbranchcache()
> +    cl = op.repo.unfiltered().changelog
> +    while rawheader:
> +        header = rbcstruct.unpack(rawheader)
> +        total += header[1] + header[2]
> +        branch = inpart.read(header[0])

We'll probably need to convert branch name between utf-8 and local encoding.

> +        for x in xrange(header[1]):
> +            node = inpart.read(20)
> +            rev = cl.rev(node)
> +            cache.setdata(branch, rev, node, False)
> +        for x in xrange(header[2]):
> +            node = inpart.read(20)
> +            rev = cl.rev(node)
> +            cache.setdata(branch, rev, node, True)
> +        rawheader = inpart.read(rbcstruct.size)

> +    if total and 'branchinfo' in vars(cache):
> +        del cache.branchinfo

No idea why we have to undo 'self.branchinfo = self._branchinfo'. Can you
add a comment?
Boris Feld - Feb. 20, 2018, 11:02 a.m.
On 19/01/2018 15:54, Yuya Nishihara wrote:
> On Thu, 18 Jan 2018 16:38:45 +0100, Boris Feld wrote:
>> # HG changeset patch
>> # User Boris Feld <boris.feld@octobus.net>
>> # Date 1516282142 -3600
>> #      Thu Jan 18 14:29:02 2018 +0100
>> # Node ID 9c1ad82226a2e1fc7ca69550a46a7f7329c6b579
>> # Parent  4ad1a1054450063cc9aa19ab2037722c64877eb7
>> # EXP-Topic wire-rbc
>> # Available At https://bitbucket.org/octobus/mercurial-devel/
>> #              hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 9c1ad82226a2
>> revbranchcache: add a bundle2 handler for a rbc part
>> diff --git a/mercurial/bundle2.py b/mercurial/bundle2.py
>> --- a/mercurial/bundle2.py
>> +++ b/mercurial/bundle2.py
>> @@ -2101,6 +2101,40 @@ def handlehgtagsfnodes(op, inpart):
>>       cache.write()
>>       op.ui.debug('applied %i hgtags fnodes cache entries\n' % count)
>>   
>> +rbcstruct = struct.Struct('>III')
>> +
>> +@parthandler('cache:rev-branch-cache')
>> +def handlerbc(op, inpart):
>> +    """receive a rev-branch-cache payload and update the local cache
>> +
>> +    The payload is a series of data related to each branch
>> +
>> +    1) branch name length
>> +    2) number of open heads
>> +    3) number of closed heads
>> +    4) open heads nodes
>> +    5) closed heads nodes
>> +    """
>> +    total = 0
>> +    rawheader = inpart.read(rbcstruct.size)
>> +    cache = op.repo.revbranchcache()
>> +    cl = op.repo.unfiltered().changelog
>> +    while rawheader:
>> +        header = rbcstruct.unpack(rawheader)
>> +        total += header[1] + header[2]
>> +        branch = inpart.read(header[0])
> We'll probably need to convert branch name between utf-8 and local encoding.

We are not sure to understand why. The rev branch cache uses an internal 
encoding, so local encoding shouldn't matter in our analysis.

https://www.mercurial-scm.org/repo/hg/file/tip/mercurial/branchmap.py#l336

>
>> +        for x in xrange(header[1]):
>> +            node = inpart.read(20)
>> +            rev = cl.rev(node)
>> +            cache.setdata(branch, rev, node, False)
>> +        for x in xrange(header[2]):
>> +            node = inpart.read(20)
>> +            rev = cl.rev(node)
>> +            cache.setdata(branch, rev, node, True)
>> +        rawheader = inpart.read(rbcstruct.size)
>> +    if total and 'branchinfo' in vars(cache):
>> +        del cache.branchinfo
> No idea why we have to undo 'self.branchinfo = self._branchinfo'. Can you
> add a comment?


If branchmap caught an error when trying to read the branchmap file and 
it's read-only (the default), it set the branchinfo attribute (with 
`self.branchinfo = self._branchinfo`) in order to bypass the caching 
logic defined in the `branchinfo` method.

If we add some information in the cache, we need to undo this bypass by 
removing the instance attribute, so branchinfo will point back to the 
original branchinfo method and cache will be used.

We will add a comment in the V2 version of this series.
Yuya Nishihara - Feb. 20, 2018, 1:03 p.m.
On Tue, 20 Feb 2018 12:02:36 +0100, Feld Boris wrote:
> On 19/01/2018 15:54, Yuya Nishihara wrote:
> > On Thu, 18 Jan 2018 16:38:45 +0100, Boris Feld wrote:
> >> # HG changeset patch
> >> # User Boris Feld <boris.feld@octobus.net>
> >> # Date 1516282142 -3600
> >> #      Thu Jan 18 14:29:02 2018 +0100
> >> # Node ID 9c1ad82226a2e1fc7ca69550a46a7f7329c6b579
> >> # Parent  4ad1a1054450063cc9aa19ab2037722c64877eb7
> >> # EXP-Topic wire-rbc
> >> # Available At https://bitbucket.org/octobus/mercurial-devel/
> >> #              hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 9c1ad82226a2
> >> revbranchcache: add a bundle2 handler for a rbc part
> >> diff --git a/mercurial/bundle2.py b/mercurial/bundle2.py
> >> --- a/mercurial/bundle2.py
> >> +++ b/mercurial/bundle2.py
> >> @@ -2101,6 +2101,40 @@ def handlehgtagsfnodes(op, inpart):
> >>       cache.write()
> >>       op.ui.debug('applied %i hgtags fnodes cache entries\n' % count)
> >>   
> >> +rbcstruct = struct.Struct('>III')
> >> +
> >> +@parthandler('cache:rev-branch-cache')
> >> +def handlerbc(op, inpart):
> >> +    """receive a rev-branch-cache payload and update the local cache
> >> +
> >> +    The payload is a series of data related to each branch
> >> +
> >> +    1) branch name length
> >> +    2) number of open heads
> >> +    3) number of closed heads
> >> +    4) open heads nodes
> >> +    5) closed heads nodes
> >> +    """
> >> +    total = 0
> >> +    rawheader = inpart.read(rbcstruct.size)
> >> +    cache = op.repo.revbranchcache()
> >> +    cl = op.repo.unfiltered().changelog
> >> +    while rawheader:
> >> +        header = rbcstruct.unpack(rawheader)
> >> +        total += header[1] + header[2]
> >> +        branch = inpart.read(header[0])
> > We'll probably need to convert branch name between utf-8 and local encoding.
> 
> We are not sure to understand why. The rev branch cache uses an internal 
> encoding, so local encoding shouldn't matter in our analysis.
> 
> https://www.mercurial-scm.org/repo/hg/file/tip/mercurial/branchmap.py#l336

The storage is in utf-8, but the function interface isn't.

https://www.mercurial-scm.org/repo/hg/file/tip/mercurial/branchmap.py#l358
https://www.mercurial-scm.org/repo/hg/file/tip/mercurial/branchmap.py#l365

> 
> >
> >> +        for x in xrange(header[1]):
> >> +            node = inpart.read(20)
> >> +            rev = cl.rev(node)
> >> +            cache.setdata(branch, rev, node, False)
> >> +        for x in xrange(header[2]):
> >> +            node = inpart.read(20)
> >> +            rev = cl.rev(node)
> >> +            cache.setdata(branch, rev, node, True)
> >> +        rawheader = inpart.read(rbcstruct.size)
> >> +    if total and 'branchinfo' in vars(cache):
> >> +        del cache.branchinfo
> > No idea why we have to undo 'self.branchinfo = self._branchinfo'. Can you
> > add a comment?
> 
> 
> If branchmap caught an error when trying to read the branchmap file and 
> it's read-only (the default), it set the branchinfo attribute (with 
> `self.branchinfo = self._branchinfo`) in order to bypass the caching 
> logic defined in the `branchinfo` method.
> 
> If we add some information in the cache, we need to undo this bypass by 
> removing the instance attribute, so branchinfo will point back to the 
> original branchinfo method and cache will be used.
> 
> We will add a comment in the V2 version of this series.

Thanks. Perhaps that should be handled in revbranchcache.setdata() if readonly
hack has to be dropped when cache data is updated.

Patch

diff --git a/mercurial/bundle2.py b/mercurial/bundle2.py
--- a/mercurial/bundle2.py
+++ b/mercurial/bundle2.py
@@ -2101,6 +2101,40 @@  def handlehgtagsfnodes(op, inpart):
     cache.write()
     op.ui.debug('applied %i hgtags fnodes cache entries\n' % count)
 
+rbcstruct = struct.Struct('>III')
+
+@parthandler('cache:rev-branch-cache')
+def handlerbc(op, inpart):
+    """receive a rev-branch-cache payload and update the local cache
+
+    The payload is a series of data related to each branch
+
+    1) branch name length
+    2) number of open heads
+    3) number of closed heads
+    4) open heads nodes
+    5) closed heads nodes
+    """
+    total = 0
+    rawheader = inpart.read(rbcstruct.size)
+    cache = op.repo.revbranchcache()
+    cl = op.repo.unfiltered().changelog
+    while rawheader:
+        header = rbcstruct.unpack(rawheader)
+        total += header[1] + header[2]
+        branch = inpart.read(header[0])
+        for x in xrange(header[1]):
+            node = inpart.read(20)
+            rev = cl.rev(node)
+            cache.setdata(branch, rev, node, False)
+        for x in xrange(header[2]):
+            node = inpart.read(20)
+            rev = cl.rev(node)
+            cache.setdata(branch, rev, node, True)
+        rawheader = inpart.read(rbcstruct.size)
+    if total and 'branchinfo' in vars(cache):
+        del cache.branchinfo
+
 @parthandler('pushvars')
 def bundle2getvars(op, part):
     '''unbundle a bundle2 containing shellvars on the server'''