Patchwork [1,of,5] manifest: break mancache into two caches

login
register
mail settings
Submitter Durham Goode
Date Aug. 9, 2016, 1:17 a.m.
Message ID <f91cdd4315bbc92ad893.1470705430@dev8486.prn1.facebook.com>
Download mbox | patch
Permalink /patch/16225/
State Superseded
Headers show

Comments

Durham Goode - Aug. 9, 2016, 1:17 a.m.
# HG changeset patch
# User Durham Goode <durham@fb.com>
# Date 1470696646 25200
#      Mon Aug 08 15:50:46 2016 -0700
# Node ID f91cdd4315bbc92ad893c8084c0347c218399ce3
# Parent  37b6f0ec6241a62de90737409458cd622e2fac0d
manifest: break mancache into two caches

The old manifest cache cached both the inmemory representation and the raw text.
As part of the manifest refactor we want to separate the storage format from the
in memory representation, so let's split this cache into two caches.

This will let other manifest implementations participate in the in memory cache,
while allowing the revlog based implementations to still depend on the full text
caching where necessary.
Durham Goode - Aug. 9, 2016, 1:28 a.m.
On 8/8/16 6:17 PM, Durham Goode wrote:
> # HG changeset patch
> # User Durham Goode <durham@fb.com>
> # Date 1470696646 25200
> #      Mon Aug 08 15:50:46 2016 -0700
> # Node ID f91cdd4315bbc92ad893c8084c0347c218399ce3
> # Parent  37b6f0ec6241a62de90737409458cd622e2fac0d
> manifest: break mancache into two caches
>
This is the beginning of a 30 patch series that refactors the current 
manifest class. It splits the concept of a manifest collection from the 
concept of a manifest instance and from the concept of a particular 
storage (like revlogs).  The result looks very much like our changelog 
today (changelog is a collection of commits, changectx is an individual 
commit), except we've also separated the storage as well.

The overall refactor adds the new classes incrementally, then begins 
moving functionality out of manifest on to the new classes, and changing 
call sites as we go.  At the very end the current manifest class will be 
deleted.

The final code can be inspected here:
https://bitbucket.org/DurhamG/hg/src/73fb2514d89400e290684e635a4f592017b4ad08/mercurial/manifest.py?at=manifestrefactor&fileviewer=file-view-default#manifest.py-954

(as can the entire series)

I'll be getting perf numbers before we get far enough into the series 
for it to be important.

The high level design ends up looking like so:

class manifestlog(object):
     """A collection class representing the collection of manifest snapshots
     referenced by commits in the repository.

     In this situation, 'manifest' refers to the abstract concept of a 
snapshot
     of the list of files in the given commit. Consumers of the output 
of this
     class do not care about the implementation details of the actual 
manifests
     they receive (i.e. tree or flat or lazily loaded, etc)."""

     def __init__(self, opener, revlog)
     def __getitem__(self, node): return self.get(node)
     def get(self, node, dir='')
     def add(self, m, transaction, link, p1, p2...): return 
m.write(transaction, ...)

class manifestrevlog(revlog):
     """A revlog that stores manifest texts. This is responsible for 
caching the
     full-text manifest contents.
     """

     def dirlog(self, dir)

class manifestctx(manifestdict):
     """A class representing a single revision of a manifest, including its
     contents, its parent revs, and its linkrev."""

     def new(self)
     def node(self)
     def p1(self)
     def p2(self)
     def linkrev(self)
     def readfast(self, shallow=False)
     def readdelta(self, shallow=False)

class treemanifestctx(treemanifest)
     """Same as manifestctx, but is backed by tree storage instead of 
flat storage."""

     <same as manifestctx>

class memmanifestctx(manifestdict):
     """In memory representation of a pending manifestctx. Has a write 
function
     that will serialize the pending manifest to storage."""

     def new(self)
     def write(self, transaction, link, p1, p2, ....)

class memtreemanifestctx(treemanifest):
     """Same as memmanifestctx, except it is also aware of recursive 
serializing
     trees to storage."""

     <same as memmanifestctx>
     def _addtree(...)

Patch

diff --git a/mercurial/bundlerepo.py b/mercurial/bundlerepo.py
--- a/mercurial/bundlerepo.py
+++ b/mercurial/bundlerepo.py
@@ -205,7 +205,7 @@  class bundlemanifest(bundlerevlog, manif
             node = self.node(node)
 
         if node in self._mancache:
-            result = self._mancache[node][0].text()
+            result = self._mancache[node].text()
         else:
             result = manifest.manifest.revision(self, nodeorrev)
         return result
diff --git a/mercurial/manifest.py b/mercurial/manifest.py
--- a/mercurial/manifest.py
+++ b/mercurial/manifest.py
@@ -908,6 +908,7 @@  class manifest(revlog.revlog):
             usetreemanifest = opts.get('treemanifest', usetreemanifest)
             usemanifestv2 = opts.get('manifestv2', usemanifestv2)
         self._mancache = util.lrucachedict(cachesize)
+        self._fulltextcache = util.lrucachedict(cachesize)
         self._treeinmem = usetreemanifest
         self._treeondisk = usetreemanifest
         self._usemanifestv2 = usemanifestv2
@@ -1000,7 +1001,7 @@  class manifest(revlog.revlog):
         if node == revlog.nullid:
             return self._newmanifest() # don't upset local cache
         if node in self._mancache:
-            return self._mancache[node][0]
+            return self._mancache[node]
         if self._treeondisk:
             def gettext():
                 return self.revision(node)
@@ -1014,7 +1015,8 @@  class manifest(revlog.revlog):
             text = self.revision(node)
             m = self._newmanifest(text)
             arraytext = array.array('c', text)
-        self._mancache[node] = (m, arraytext)
+        self._mancache[node] = m
+        self._fulltextcache[node] = arraytext
         return m
 
     def readshallow(self, node):
@@ -1034,7 +1036,7 @@  class manifest(revlog.revlog):
             return None, None
 
     def add(self, m, transaction, link, p1, p2, added, removed):
-        if (p1 in self._mancache and not self._treeinmem
+        if (p1 in self._fulltextcache and not self._treeinmem
             and not self._usemanifestv2):
             # If our first parent is in the manifest cache, we can
             # compute a delta here using properties we know about the
@@ -1046,7 +1048,7 @@  class manifest(revlog.revlog):
             work = heapq.merge([(x, False) for x in added],
                                [(x, True) for x in removed])
 
-            arraytext, deltatext = m.fastdelta(self._mancache[p1][1], work)
+            arraytext, deltatext = m.fastdelta(self._fulltextcache[p1], work)
             cachedelta = self.rev(p1), deltatext
             text = util.buffer(arraytext)
             n = self.addrevision(text, transaction, link, p1, p2, cachedelta)
@@ -1065,7 +1067,8 @@  class manifest(revlog.revlog):
                 n = self.addrevision(text, transaction, link, p1, p2)
                 arraytext = array.array('c', text)
 
-        self._mancache[n] = (m, arraytext)
+        self._mancache[n] = m
+        self._fulltextcache[n] = arraytext
 
         return n
 
@@ -1092,5 +1095,6 @@  class manifest(revlog.revlog):
 
     def clearcaches(self):
         super(manifest, self).clearcaches()
+        self._fulltextcache.clear()
         self._mancache.clear()
         self._dirlogcache = {'': self}