Patchwork [04,of,10] branchmap: add the tiprev (cache key) on the branchmap object

login
register
mail settings
Submitter Pierre-Yves David
Date Dec. 22, 2012, 1:48 a.m.
Message ID <090ada0acddb4486e94f.1356140931@yamac.lan>
Download mbox | patch
Permalink /patch/268/
State Superseded, archived
Commit 090ada0acddb4486e94fba9c89e25b0624442d0f
Headers show

Comments

Pierre-Yves David - Dec. 22, 2012, 1:48 a.m.
# HG changeset patch
# User Pierre-Yves David <pierre-yves.david at logilab.fr>
# Date 1356138386 -3600
# Node ID 090ada0acddb4486e94fba9c89e25b0624442d0f
# Parent  ad194a8ab5c15e6d74fe087061ee960f5d6bbf16
branchmap: add the tiprev (cache key) on the branchmap object

The actual cache key used on disk is the (tipnode, tiprev) pair. There is no
reason not to use the revision number for the in memory version.
Augie Fackler - Dec. 24, 2012, 1:21 a.m.
On Dec 21, 2012, at 8:48 PM, Pierre-Yves David <pierre-yves.david at ens-lyon.org> wrote:

> # HG changeset patch
> # User Pierre-Yves David <pierre-yves.david at logilab.fr>
> # Date 1356138386 -3600
> # Node ID 090ada0acddb4486e94fba9c89e25b0624442d0f
> # Parent  ad194a8ab5c15e6d74fe087061ee960f5d6bbf16
> branchmap: add the tiprev (cache key) on the branchmap object
> 
> The actual cache key used on disk is the (tipnode, tiprev) pair. There is no
> reason not to use the revision number for the in memory version.

It feels strange to me to have both tipnode and tiprev on this object, partially because it feels more likely that they'll fall out of sync somehow. Then again, I suppose if that happens then we want the cache to be invalidated anyway?

> 
> diff --git a/mercurial/branchmap.py b/mercurial/branchmap.py
> --- a/mercurial/branchmap.py
> +++ b/mercurial/branchmap.py
> @@ -13,11 +13,11 @@ def read(repo):
>     try:
>         f = repo.opener("cache/branchheads")
>         lines = f.read().split('\n')
>         f.close()
>     except (IOError, OSError):
> -        return branchcache(), nullrev
> +        return branchcache()
> 
>     try:
>         last, lrev = lines.pop(0).split(" ", 1)
>         last, lrev = bin(last), int(lrev)
>         if lrev >= len(repo) or repo[lrev].node() != last:
> @@ -31,17 +31,18 @@ def read(repo):
>             if not node in repo:
>                 raise ValueError('invalidating branch cache because node '+
>                                  '%s does not exist' % node)
>             partial.setdefault(label, []).append(bin(node))
>         partial.tipnode = last
> +        partial.tiprev = lrev
>     except KeyboardInterrupt:
>         raise
>     except Exception, inst:
>         if repo.ui.debugflag:
>             repo.ui.warn(str(inst), '\n')
> -        partial, lrev = branchcache(), nullrev
> -    return partial, lrev
> +        partial = branchcache()
> +    return partial
> 
> def write(repo, branches, tip, tiprev):
>     try:
>         f = repo.opener("cache/branchheads", "w", atomictemp=True)
>         f.write("%s %s\n" % (hex(tip), tiprev))
> @@ -119,35 +120,35 @@ def updatecache(repo):
>     partial = repo._branchcache
>     if partial is not None and partial.tipnode == tip:
>         return
> 
>     if partial is None or partial.tipnode not in cl.nodemap:
> -        partial, lrev = read(repo)
> -    else:
> -        lrev = cl.rev(partial.tipnode)
> +        partial = read(repo)
> 
>     catip = repo._cacheabletip()
> -    # if lrev == catip: cache is already up to date
> -    # if lrev >  catip: we have uncachable element in `partial` can't write
> -    #                   on disk
> -    if lrev < catip:
> -        ctxgen = (repo[r] for r in cl.revs(lrev + 1, catip))
> +    # if partial.tiprev == catip: cache is already up to date
> +    # if partial.tiprev >  catip: we have uncachable element in `partial` can't
> +    #                             write on disk
> +    if partial.tiprev < catip:
> +        ctxgen = (repo[r] for r in cl.revs(partial.tiprev + 1, catip))
>         update(repo, partial, ctxgen)
>         partial.tipnode = cl.node(catip)
> -        write(repo, partial, partial.tipnode, catip)
> -        lrev = catip
> +        partial.tiprev = catip
> +        write(repo, partial, partial.tipnode, partial.tiprev)
>     # If cacheable tip were lower than actual tip, we need to update the
>     # cache up to tip. This update (from cacheable to actual tip) is not
>     # written to disk since it's not cacheable.
>     tiprev = len(repo) - 1
> -    if lrev < tiprev:
> -        ctxgen = (repo[r] for r in cl.revs(lrev + 1, tiprev))
> +    if partial.tiprev < tiprev:
> +        ctxgen = (repo[r] for r in cl.revs(partial.tiprev + 1, tiprev))
>         update(repo, partial, ctxgen)
>         partial.tipnode = cl.node(tiprev)
> +        partial.tiprev = tiprev
>     repo._branchcache = partial
> 
> class branchcache(dict):
>     """A dict like object that hold branches heads cache"""
> 
> -    def __init__(self, entries=(), tipnode=nullid):
> +    def __init__(self, entries=(), tipnode=nullid, tiprev=nullrev):
>         super(branchcache, self).__init__(entries)
>         self.tipnode = tipnode
> +        self.tiprev = tiprev
> diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py
> --- a/mercurial/localrepo.py
> +++ b/mercurial/localrepo.py
> @@ -1432,17 +1432,17 @@ class localrepository(object):
>         '''
>         # If we have info, newheadnodes, on how to update the branch cache, do
>         # it, Otherwise, since nodes were destroyed, the cache is stale and this
>         # will be caught the next time it is read.
>         if newheadnodes:
> -            tiprev = len(self) - 1
>             ctxgen = (self[node] for node in newheadnodes
>                       if self.changelog.hasnode(node))
> -            branchmap.update(self, self._branchcache, ctxgen)
> -            self._branchcache.tipnode = self.changelog.tip()
> -            branchmap.write(self, self._branchcache, self._branchcache.tipnode,
> -                            tiprev)
> +            cache = self._branchcache
> +            branchmap.update(self, cache, ctxgen)
> +            cache.tipnode = self.changelog.tip()
> +            cache.tiprev = self.changelog.rev(cache.tipnode)
> +            branchmap.write(self, cache, cache.tipnode, cache.tiprev)
> 
>         # Ensure the persistent tag cache is updated.  Doing it now
>         # means that the tag cache only has to worry about destroyed
>         # heads immediately after a strip/rollback.  That in turn
>         # guarantees that "cachetip == currenttip" (comparing both rev
> @@ -2493,13 +2493,14 @@ class localrepository(object):
> 
>                 if rbheads:
>                     rtiprev = max((int(self.changelog.rev(node))
>                             for node in rbheads))
>                     cache = branchmap.branchcache(rbranchmap,
> -                                                  self[rtiprev].node())
> +                                                  self[rtiprev].node(),
> +                                                  rtiprev)
>                     self._branchcache = cache
> -                    branchmap.write(self, cache, cache.tipnode, rtiprev)
> +                    branchmap.write(self, cache, cache.tipnode, cache.tiprev)
>             self.invalidate()
>             return len(self.heads()) + 1
>         finally:
>             lock.release()
>
Pierre-Yves David - Dec. 24, 2012, 1:32 a.m.
On 24 d?c. 2012, at 02:21, Augie Fackler wrote:

> 
> On Dec 21, 2012, at 8:48 PM, Pierre-Yves David <pierre-yves.david at ens-lyon.org> wrote:
> 
>> # HG changeset patch
>> # User Pierre-Yves David <pierre-yves.david at logilab.fr>
>> # Date 1356138386 -3600
>> # Node ID 090ada0acddb4486e94fba9c89e25b0624442d0f
>> # Parent  ad194a8ab5c15e6d74fe087061ee960f5d6bbf16
>> branchmap: add the tiprev (cache key) on the branchmap object
>> 
>> The actual cache key used on disk is the (tipnode, tiprev) pair. There is no
>> reason not to use the revision number for the in memory version.
> 
> It feels strange to me to have both tipnode and tiprev on this object, partially because it feels more likely that they'll fall out of sync somehow. Then again, I suppose if that happens then we want the cache to be invalidated anyway?

They are both part of the key so we better have both of them here.

Patch 8 removes any all updates mades outside the class lowering the chance for them to go out of sync by mistake

Patch 10 makes a heavy use of both to check validity of a cache again a repo state. In particular, we read and write both on disk.
Augie Fackler - Dec. 24, 2012, 1:33 a.m.
On Dec 23, 2012, at 8:32 PM, Pierre-Yves David <pierre-yves.david at ens-lyon.org> wrote:

> 
> On 24 d?c. 2012, at 02:21, Augie Fackler wrote:
> 
>> 
>> On Dec 21, 2012, at 8:48 PM, Pierre-Yves David <pierre-yves.david at ens-lyon.org> wrote:
>> 
>>> # HG changeset patch
>>> # User Pierre-Yves David <pierre-yves.david at logilab.fr>
>>> # Date 1356138386 -3600
>>> # Node ID 090ada0acddb4486e94fba9c89e25b0624442d0f
>>> # Parent  ad194a8ab5c15e6d74fe087061ee960f5d6bbf16
>>> branchmap: add the tiprev (cache key) on the branchmap object
>>> 
>>> The actual cache key used on disk is the (tipnode, tiprev) pair. There is no
>>> reason not to use the revision number for the in memory version.
>> 
>> It feels strange to me to have both tipnode and tiprev on this object, partially because it feels more likely that they'll fall out of sync somehow. Then again, I suppose if that happens then we want the cache to be invalidated anyway?
> 
> They are both part of the key so we better have both of them here.
> 
> Patch 8 removes any all updates mades outside the class lowering the chance for them to go out of sync by mistake
> 
> Patch 10 makes a heavy use of both to check validity of a cache again a repo state. In particular, we read and write both on disk.

Is there any reason code outside of the branchmap class should ever be peeking at these attributes? Could I persuade you to make them private in a followup?

> 
> -- 
> Pierre-Yves David
>
Pierre-Yves David - Dec. 24, 2012, 1:42 a.m.
On 24 d?c. 2012, at 02:33, Augie Fackler wrote:

> On Dec 23, 2012, at 8:32 PM, Pierre-Yves David <pierre-yves.david at ens-lyon.org> wrote:
>> On 24 d?c. 2012, at 02:21, Augie Fackler wrote:
>>> It feels strange to me to have both tipnode and tiprev on this object, partially because it feels more likely that they'll fall out of sync somehow. Then again, I suppose if that happens then we want the cache to be invalidated anyway?
>> 
>> They are both part of the key so we better have both of them here.
>> 
>> Patch 8 removes any all updates mades outside the class lowering the chance for them to go out of sync by mistake
>> 
>> Patch 10 makes a heavy use of both to check validity of a cache again a repo state. In particular, we read and write both on disk.
> 
> Is there any reason code outside of the branchmap class should ever be peeking at these attributes?

The updatecache function in branchmap still do.

> Could I persuade you to make them private in a followup?

We could probably clean up the updatecache function and make them private but:

(1) That won't happen before I'm done torturing the branchmap module for filtering purpose
(2) Your low patch pushing karma is harming your persuasion skill ;-)
Augie Fackler - Dec. 24, 2012, 1:45 a.m.
On Dec 23, 2012, at 8:42 PM, Pierre-Yves David <pierre-yves.david at ens-lyon.org> wrote:

> 
> On 24 d?c. 2012, at 02:33, Augie Fackler wrote:
> 
>> On Dec 23, 2012, at 8:32 PM, Pierre-Yves David <pierre-yves.david at ens-lyon.org> wrote:
>>> On 24 d?c. 2012, at 02:21, Augie Fackler wrote:
>>>> It feels strange to me to have both tipnode and tiprev on this object, partially because it feels more likely that they'll fall out of sync somehow. Then again, I suppose if that happens then we want the cache to be invalidated anyway?
>>> 
>>> They are both part of the key so we better have both of them here.
>>> 
>>> Patch 8 removes any all updates mades outside the class lowering the chance for them to go out of sync by mistake
>>> 
>>> Patch 10 makes a heavy use of both to check validity of a cache again a repo state. In particular, we read and write both on disk.
>> 
>> Is there any reason code outside of the branchmap class should ever be peeking at these attributes?
> 
> The updatecache function in branchmap still do.
> 
>> Could I persuade you to make them private in a followup?
> 
> We could probably clean up the updatecache function and make them private but:
> 
> (1) That won't happen before I'm done torturing the branchmap module for filtering purpose

Fair enough. Can you ping me when you've applied the English edits to this series and I'll pull it? It looks fine to me.

> (2) Your low patch pushing karma is harming your persuasion skill ;-)
> 
> -- 
> Pierre-Yves

Patch

diff --git a/mercurial/branchmap.py b/mercurial/branchmap.py
--- a/mercurial/branchmap.py
+++ b/mercurial/branchmap.py
@@ -13,11 +13,11 @@  def read(repo):
     try:
         f = repo.opener("cache/branchheads")
         lines = f.read().split('\n')
         f.close()
     except (IOError, OSError):
-        return branchcache(), nullrev
+        return branchcache()
 
     try:
         last, lrev = lines.pop(0).split(" ", 1)
         last, lrev = bin(last), int(lrev)
         if lrev >= len(repo) or repo[lrev].node() != last:
@@ -31,17 +31,18 @@  def read(repo):
             if not node in repo:
                 raise ValueError('invalidating branch cache because node '+
                                  '%s does not exist' % node)
             partial.setdefault(label, []).append(bin(node))
         partial.tipnode = last
+        partial.tiprev = lrev
     except KeyboardInterrupt:
         raise
     except Exception, inst:
         if repo.ui.debugflag:
             repo.ui.warn(str(inst), '\n')
-        partial, lrev = branchcache(), nullrev
-    return partial, lrev
+        partial = branchcache()
+    return partial
 
 def write(repo, branches, tip, tiprev):
     try:
         f = repo.opener("cache/branchheads", "w", atomictemp=True)
         f.write("%s %s\n" % (hex(tip), tiprev))
@@ -119,35 +120,35 @@  def updatecache(repo):
     partial = repo._branchcache
     if partial is not None and partial.tipnode == tip:
         return
 
     if partial is None or partial.tipnode not in cl.nodemap:
-        partial, lrev = read(repo)
-    else:
-        lrev = cl.rev(partial.tipnode)
+        partial = read(repo)
 
     catip = repo._cacheabletip()
-    # if lrev == catip: cache is already up to date
-    # if lrev >  catip: we have uncachable element in `partial` can't write
-    #                   on disk
-    if lrev < catip:
-        ctxgen = (repo[r] for r in cl.revs(lrev + 1, catip))
+    # if partial.tiprev == catip: cache is already up to date
+    # if partial.tiprev >  catip: we have uncachable element in `partial` can't
+    #                             write on disk
+    if partial.tiprev < catip:
+        ctxgen = (repo[r] for r in cl.revs(partial.tiprev + 1, catip))
         update(repo, partial, ctxgen)
         partial.tipnode = cl.node(catip)
-        write(repo, partial, partial.tipnode, catip)
-        lrev = catip
+        partial.tiprev = catip
+        write(repo, partial, partial.tipnode, partial.tiprev)
     # If cacheable tip were lower than actual tip, we need to update the
     # cache up to tip. This update (from cacheable to actual tip) is not
     # written to disk since it's not cacheable.
     tiprev = len(repo) - 1
-    if lrev < tiprev:
-        ctxgen = (repo[r] for r in cl.revs(lrev + 1, tiprev))
+    if partial.tiprev < tiprev:
+        ctxgen = (repo[r] for r in cl.revs(partial.tiprev + 1, tiprev))
         update(repo, partial, ctxgen)
         partial.tipnode = cl.node(tiprev)
+        partial.tiprev = tiprev
     repo._branchcache = partial
 
 class branchcache(dict):
     """A dict like object that hold branches heads cache"""
 
-    def __init__(self, entries=(), tipnode=nullid):
+    def __init__(self, entries=(), tipnode=nullid, tiprev=nullrev):
         super(branchcache, self).__init__(entries)
         self.tipnode = tipnode
+        self.tiprev = tiprev
diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py
--- a/mercurial/localrepo.py
+++ b/mercurial/localrepo.py
@@ -1432,17 +1432,17 @@  class localrepository(object):
         '''
         # If we have info, newheadnodes, on how to update the branch cache, do
         # it, Otherwise, since nodes were destroyed, the cache is stale and this
         # will be caught the next time it is read.
         if newheadnodes:
-            tiprev = len(self) - 1
             ctxgen = (self[node] for node in newheadnodes
                       if self.changelog.hasnode(node))
-            branchmap.update(self, self._branchcache, ctxgen)
-            self._branchcache.tipnode = self.changelog.tip()
-            branchmap.write(self, self._branchcache, self._branchcache.tipnode,
-                            tiprev)
+            cache = self._branchcache
+            branchmap.update(self, cache, ctxgen)
+            cache.tipnode = self.changelog.tip()
+            cache.tiprev = self.changelog.rev(cache.tipnode)
+            branchmap.write(self, cache, cache.tipnode, cache.tiprev)
 
         # Ensure the persistent tag cache is updated.  Doing it now
         # means that the tag cache only has to worry about destroyed
         # heads immediately after a strip/rollback.  That in turn
         # guarantees that "cachetip == currenttip" (comparing both rev
@@ -2493,13 +2493,14 @@  class localrepository(object):
 
                 if rbheads:
                     rtiprev = max((int(self.changelog.rev(node))
                             for node in rbheads))
                     cache = branchmap.branchcache(rbranchmap,
-                                                  self[rtiprev].node())
+                                                  self[rtiprev].node(),
+                                                  rtiprev)
                     self._branchcache = cache
-                    branchmap.write(self, cache, cache.tipnode, rtiprev)
+                    branchmap.write(self, cache, cache.tipnode, cache.tiprev)
             self.invalidate()
             return len(self.heads()) + 1
         finally:
             lock.release()