Patchwork [1,of,4] manifest: add treemanifestctx class

login
register
mail settings
Submitter Durham Goode
Date Aug. 30, 2016, 11:40 p.m.
Message ID <a6f968072eb72212bf8e.1472600441@dev8486.prn1.facebook.com>
Download mbox | patch
Permalink /patch/16497/
State Accepted
Headers show

Comments

Durham Goode - Aug. 30, 2016, 11:40 p.m.
# HG changeset patch
# User Durham Goode <durham@fb.com>
# Date 1472518307 25200
#      Mon Aug 29 17:51:47 2016 -0700
# Node ID a6f968072eb72212bf8e16fb74114654aca98330
# Parent  318e2b600b80e4ed3c6f37df46ec7544f60d4c0b
manifest: add treemanifestctx class

Before we start using repo.manifestlog in the rest of the code base, we need to
make sure that it's capable of returning treemanifests. As we add new
functionality to manifestctx, we'll add it to treemanifestctx at the same time.
via Mercurial-devel - Aug. 31, 2016, 5:33 p.m.
On Tue, Aug 30, 2016 at 4:40 PM, Durham Goode <durham@fb.com> wrote:
> # HG changeset patch
> # User Durham Goode <durham@fb.com>
> # Date 1472518307 25200
> #      Mon Aug 29 17:51:47 2016 -0700
> # Node ID a6f968072eb72212bf8e16fb74114654aca98330
> # Parent  318e2b600b80e4ed3c6f37df46ec7544f60d4c0b
> manifest: add treemanifestctx class
>
> Before we start using repo.manifestlog in the rest of the code base, we need to
> make sure that it's capable of returning treemanifests. As we add new
> functionality to manifestctx, we'll add it to treemanifestctx at the same time.
>
> diff --git a/mercurial/manifest.py b/mercurial/manifest.py
> --- a/mercurial/manifest.py
> +++ b/mercurial/manifest.py
> @@ -944,21 +944,24 @@ class manifestlog(object):
>          """Retrieves the manifest instance for the given node. Throws a KeyError
>          if not found.
>          """
> -        if (self._oldmanifest._treeondisk
> -            or self._oldmanifest._treeinmem):
> -            # TODO: come back and support tree manifests directly
> -            return self._oldmanifest.read(node)
> +        if node == revlog.nullid:
> +            if self._oldmanifest._treeinmem:
> +                return treemanifest()
> +            else:
> +                return manifestdict()

At this point of the stack, his method (manifestlog.__getitem__()) can
return either a manifest (treemanifest/manifestdict) or a context
(treemanifestctx/manifestctx). Please fix :-) Perhaps just make "
self._mancache[node] = m" conditional (on node != nullid)? Unless
nullid is requested very frequently, I suspect the extra "if node in
self._mancache" (which will be False) won't be an issue.

>
> -        if node == revlog.nullid:
> -            return manifestdict()
>          if node in self._mancache:
>              cachemf = self._mancache[node]
>              # The old manifest may put non-ctx manifests in the cache, so skip
>              # those since they don't implement the full api.
> -            if isinstance(cachemf, manifestctx):
> +            if (isinstance(cachemf, manifestctx) or
> +                isinstance(cachemf, treemanifestctx)):

Maybe this suggests that a super class for manifestctx and
treemanifestctx would make sense? Maybe manifestctx, flatmanifestctx
and treemanifestctx? If that makes sense, it could of course be added
after this part of the series (or as a fifth patch in the series).

>                  return cachemf
>
> -        m = manifestctx(self._revlog, node)
> +        if self._oldmanifest._treeinmem:
> +            m = treemanifestctx(self._revlog, node, dir='')
> +        else:
> +            m = manifestctx(self._revlog, node)
>          self._mancache[node] = m
>          return m
>
> @@ -984,6 +987,35 @@ class manifestctx(manifestdict):
>      def node(self):
>          return self._node
>
> +class treemanifestctx(treemanifest):

manifestctx also has a node() method, so you should probably add that
to treemanifestctx for consistency.

> +    def __init__(self, revlog, node, dir=''):

You always pass a value for dir, so make it not optional? And I'd like
to see it before the node argument, since it defines the scope for the
node (it's the node for a directory, not the directory for a node).

> +        revlog = revlog.dirlog(dir)
> +        self._revlog = revlog
> +        self._dir = dir
> +
> +        self._node = node
> +
> +        # TODO: Load p1/p2/linkrev lazily. They need to be lazily loaded so that
> +        # we can instantiate treemanifestctx objects for directories we don't
> +        # have on disk.
> +        #self.p1, self.p2 = revlog.parents(node)
> +        #rev = revlog.rev(node)
> +        #self.linkrev = revlog.linkrev(rev)

Do you mind commenting out the ones in manifestctx too for now? I'm
worried we otherwise might break treemanifests when we start using
p1/p2/rev/linkrev and not notice because it's not covered as well by
tests.

> +
> +        if revlog._treeondisk:
> +            super(treemanifestctx, self).__init__(dir=dir)
> +            def gettext():
> +                return revlog.revision(node)
> +            def readsubtree(dir, subm):
> +                return revlog.dirlog(dir).read(subm)
> +            self.read(gettext, readsubtree)
> +            self.setnode(node)
> +        else:
> +            data = revlog.revision(node)
> +            arraytext = array.array('c', data)
> +            revlog.fulltextcache[node] = arraytext
> +            super(treemanifestctx, self).__init__(dir=dir, text=data)
> +
>  class manifest(manifestrevlog):
>      def __init__(self, opener, dir='', dirlogcache=None):
>          '''The 'dir' and 'dirlogcache' arguments are for internal use by
> _______________________________________________
> Mercurial-devel mailing list
> Mercurial-devel@mercurial-scm.org
> https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Durham Goode - Aug. 31, 2016, 7:43 p.m.
On 8/31/16 10:33 AM, Martin von Zweigbergk wrote:
> On Tue, Aug 30, 2016 at 4:40 PM, Durham Goode <durham@fb.com> wrote:
>> # HG changeset patch
>> # User Durham Goode <durham@fb.com>
>> # Date 1472518307 25200
>> #      Mon Aug 29 17:51:47 2016 -0700
>> # Node ID a6f968072eb72212bf8e16fb74114654aca98330
>> # Parent  318e2b600b80e4ed3c6f37df46ec7544f60d4c0b
>> manifest: add treemanifestctx class
>>
>> Before we start using repo.manifestlog in the rest of the code base, we need to
>> make sure that it's capable of returning treemanifests. As we add new
>> functionality to manifestctx, we'll add it to treemanifestctx at the same time.
>>
>> diff --git a/mercurial/manifest.py b/mercurial/manifest.py
>> --- a/mercurial/manifest.py
>> +++ b/mercurial/manifest.py
>> @@ -944,21 +944,24 @@ class manifestlog(object):
>>           """Retrieves the manifest instance for the given node. Throws a KeyError
>>           if not found.
>>           """
>> -        if (self._oldmanifest._treeondisk
>> -            or self._oldmanifest._treeinmem):
>> -            # TODO: come back and support tree manifests directly
>> -            return self._oldmanifest.read(node)
>> +        if node == revlog.nullid:
>> +            if self._oldmanifest._treeinmem:
>> +                return treemanifest()
>> +            else:
>> +                return manifestdict()
> At this point of the stack, his method (manifestlog.__getitem__()) can
> return either a manifest (treemanifest/manifestdict) or a context
> (treemanifestctx/manifestctx). Please fix :-) Perhaps just make "
> self._mancache[node] = m" conditional (on node != nullid)? Unless
> nullid is requested very frequently, I suspect the extra "if node in
> self._mancache" (which will be False) won't be an issue.
Will do
>
>> -        if node == revlog.nullid:
>> -            return manifestdict()
>>           if node in self._mancache:
>>               cachemf = self._mancache[node]
>>               # The old manifest may put non-ctx manifests in the cache, so skip
>>               # those since they don't implement the full api.
>> -            if isinstance(cachemf, manifestctx):
>> +            if (isinstance(cachemf, manifestctx) or
>> +                isinstance(cachemf, treemanifestctx)):
> Maybe this suggests that a super class for manifestctx and
> treemanifestctx would make sense? Maybe manifestctx, flatmanifestctx
> and treemanifestctx? If that makes sense, it could of course be added
> after this part of the series (or as a fifth patch in the series).
Perhaps.  But this code will be disappearing once the mancache is only 
populated with ctx's.
>
>>                   return cachemf
>>
>> -        m = manifestctx(self._revlog, node)
>> +        if self._oldmanifest._treeinmem:
>> +            m = treemanifestctx(self._revlog, node, dir='')
>> +        else:
>> +            m = manifestctx(self._revlog, node)
>>           self._mancache[node] = m
>>           return m
>>
>> @@ -984,6 +987,35 @@ class manifestctx(manifestdict):
>>       def node(self):
>>           return self._node
>>
>> +class treemanifestctx(treemanifest):
> manifestctx also has a node() method, so you should probably add that
> to treemanifestctx for consistency.
Will do
>
>> +    def __init__(self, revlog, node, dir=''):
> You always pass a value for dir, so make it not optional? And I'd like
> to see it before the node argument, since it defines the scope for the
> node (it's the node for a directory, not the directory for a node).
Will do
>
>> +        revlog = revlog.dirlog(dir)
>> +        self._revlog = revlog
>> +        self._dir = dir
>> +
>> +        self._node = node
>> +
>> +        # TODO: Load p1/p2/linkrev lazily. They need to be lazily loaded so that
>> +        # we can instantiate treemanifestctx objects for directories we don't
>> +        # have on disk.
>> +        #self.p1, self.p2 = revlog.parents(node)
>> +        #rev = revlog.rev(node)
>> +        #self.linkrev = revlog.linkrev(rev)
> Do you mind commenting out the ones in manifestctx too for now? I'm
> worried we otherwise might break treemanifests when we start using
> p1/p2/rev/linkrev and not notice because it's not covered as well by
> tests.
Will do
>
>> +
>> +        if revlog._treeondisk:
>> +            super(treemanifestctx, self).__init__(dir=dir)
>> +            def gettext():
>> +                return revlog.revision(node)
>> +            def readsubtree(dir, subm):
>> +                return revlog.dirlog(dir).read(subm)
>> +            self.read(gettext, readsubtree)
>> +            self.setnode(node)
>> +        else:
>> +            data = revlog.revision(node)
>> +            arraytext = array.array('c', data)
>> +            revlog.fulltextcache[node] = arraytext
>> +            super(treemanifestctx, self).__init__(dir=dir, text=data)
>> +
>>   class manifest(manifestrevlog):
>>       def __init__(self, opener, dir='', dirlogcache=None):
>>           '''The 'dir' and 'dirlogcache' arguments are for internal use by
>> _______________________________________________
>> Mercurial-devel mailing list
>> Mercurial-devel@mercurial-scm.org
>> https://urldefense.proofpoint.com/v2/url?u=https-3A__www.mercurial-2Dscm.org_mailman_listinfo_mercurial-2Ddevel&d=DQIBaQ&c=5VD0RTtNlTh3ycd41b3MUw&r=nuarHzhP1wi1T9iURRCj1A&m=7SFuBPXwDS16G42wgq9qhYwEXsl7NwAi_Bgfq1u3ekQ&s=xUWhiuVGCJz58KtvVIygw7XmK2Nhq8LSCVhK72rZjHE&e=

Patch

diff --git a/mercurial/manifest.py b/mercurial/manifest.py
--- a/mercurial/manifest.py
+++ b/mercurial/manifest.py
@@ -944,21 +944,24 @@  class manifestlog(object):
         """Retrieves the manifest instance for the given node. Throws a KeyError
         if not found.
         """
-        if (self._oldmanifest._treeondisk
-            or self._oldmanifest._treeinmem):
-            # TODO: come back and support tree manifests directly
-            return self._oldmanifest.read(node)
+        if node == revlog.nullid:
+            if self._oldmanifest._treeinmem:
+                return treemanifest()
+            else:
+                return manifestdict()
 
-        if node == revlog.nullid:
-            return manifestdict()
         if node in self._mancache:
             cachemf = self._mancache[node]
             # The old manifest may put non-ctx manifests in the cache, so skip
             # those since they don't implement the full api.
-            if isinstance(cachemf, manifestctx):
+            if (isinstance(cachemf, manifestctx) or
+                isinstance(cachemf, treemanifestctx)):
                 return cachemf
 
-        m = manifestctx(self._revlog, node)
+        if self._oldmanifest._treeinmem:
+            m = treemanifestctx(self._revlog, node, dir='')
+        else:
+            m = manifestctx(self._revlog, node)
         self._mancache[node] = m
         return m
 
@@ -984,6 +987,35 @@  class manifestctx(manifestdict):
     def node(self):
         return self._node
 
+class treemanifestctx(treemanifest):
+    def __init__(self, revlog, node, dir=''):
+        revlog = revlog.dirlog(dir)
+        self._revlog = revlog
+        self._dir = dir
+
+        self._node = node
+
+        # TODO: Load p1/p2/linkrev lazily. They need to be lazily loaded so that
+        # we can instantiate treemanifestctx objects for directories we don't
+        # have on disk.
+        #self.p1, self.p2 = revlog.parents(node)
+        #rev = revlog.rev(node)
+        #self.linkrev = revlog.linkrev(rev)
+
+        if revlog._treeondisk:
+            super(treemanifestctx, self).__init__(dir=dir)
+            def gettext():
+                return revlog.revision(node)
+            def readsubtree(dir, subm):
+                return revlog.dirlog(dir).read(subm)
+            self.read(gettext, readsubtree)
+            self.setnode(node)
+        else:
+            data = revlog.revision(node)
+            arraytext = array.array('c', data)
+            revlog.fulltextcache[node] = arraytext
+            super(treemanifestctx, self).__init__(dir=dir, text=data)
+
 class manifest(manifestrevlog):
     def __init__(self, opener, dir='', dirlogcache=None):
         '''The 'dir' and 'dirlogcache' arguments are for internal use by