Patchwork [1,of,6,RFC] manifest: introduce an accessor class for manifests

login
register
mail settings
Submitter Durham Goode
Date Nov. 3, 2016, 10:27 p.m.
Message ID <1788ee9e1df92ac94b9b.1478212057@dev111.prn1.facebook.com>
Download mbox | patch
Permalink /patch/17329/
State Changes Requested
Headers show

Comments

Durham Goode - Nov. 3, 2016, 10:27 p.m.
# HG changeset patch
# User Durham Goode <durham@fb.com>
# Date 1478208817 25200
#      Thu Nov 03 14:33:37 2016 -0700
# Branch stable
# Node ID 1788ee9e1df92ac94b9be84eac6d16e3bad903a9
# Parent  b9f7b0c10027764cee77f9c6d61877fcffea837f
manifest: introduce an accessor class for manifests

This introduces a revlogaccessor class which can be used to allow multiple
objects hold an auto-invalidating reference to a revlog, without having to hold
a reference to the actual repo object. Future patches will switch repo.manifest
and repo.manifestlog to access the manifest through this accessor. This will fix
the circular reference caused by manifestlog and manifestctx holding a reference
to the repo
Durham Goode - Nov. 3, 2016, 10:33 p.m.
This series is RFC because it conflicts with my V5 manifest series that 
is out.

These patches remove the circular dependency between manifestlog and 
localrepo by adding a manifestaccessor instance which provides 
invalidating access to the manifest revlog, so multiple objects can 
reference the revlog data without holding on to the repo itself.  I sent 
this out now to get feedback since I think people may have opinions on 
this approach.


On 11/3/16 3:27 PM, Durham Goode wrote:
> # HG changeset patch
> # User Durham Goode <durham@fb.com>
> # Date 1478208817 25200
> #      Thu Nov 03 14:33:37 2016 -0700
> # Branch stable
> # Node ID 1788ee9e1df92ac94b9be84eac6d16e3bad903a9
> # Parent  b9f7b0c10027764cee77f9c6d61877fcffea837f
> manifest: introduce an accessor class for manifests
>
> This introduces a revlogaccessor class which can be used to allow multiple
> objects hold an auto-invalidating reference to a revlog, without having to hold
> a reference to the actual repo object. Future patches will switch repo.manifest
> and repo.manifestlog to access the manifest through this accessor. This will fix
> the circular reference caused by manifestlog and manifestctx holding a reference
> to the repo
>
> diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py
> --- a/mercurial/localrepo.py
> +++ b/mercurial/localrepo.py
> @@ -514,6 +514,11 @@ class localrepository(object):
>           # manifest creation.
>           return manifest.manifest(self.svfs)
>   
> +    @unfilteredpropertycache
> +    def manifestaccessor(self):
> +        return revlogaccessor('00manifest.i', self.svfs,
> +                              self._constructmanifest)
> +
>       @storecache('00manifest.i')
>       def manifestlog(self):
>           return manifest.manifestlog(self.svfs, self)
> @@ -1275,6 +1280,8 @@ class localrepository(object):
>                   delattr(unfiltered, k)
>               except AttributeError:
>                   pass
> +        self.manifestaccessor.clear(clearfilecache)
> +
>           self.invalidatecaches()
>           if not self.currenttransaction():
>               # TODO: Changing contents of store outside transaction
> @@ -1296,6 +1303,7 @@ class localrepository(object):
>               if k == 'dirstate' or k not in self.__dict__:
>                   continue
>               ce.refresh()
> +        self.manifestaccessor.refresh()
>   
>       def _lock(self, vfs, lockname, wait, releasefn, acquirefn, desc,
>                 inheritchecker=None, parentenvvar=None):
> @@ -2004,3 +2012,40 @@ def newreporequirements(repo):
>           requirements.add('manifestv2')
>   
>       return requirements
> +
> +def revlogaccessor(filename, opener, constructor):
> +    """Creates an accessor that provides cached and invalidated access to a
> +    revlog, via instance.revlog. This is useful for letting multiple objects
> +    hold a reference to the revlog, without having to hold a possibly-circular
> +    reference to the actual repository.  """
> +
> +    # We have to use a runtime type here, because the only way to create a
> +    # property is to put it on a class itself, and the property is dynamically
> +    # defined by the filename parameter.
> +    class accessor(object):
> +        def __init__(self):
> +            self._filecache = {}
> +
> +        @scmutil.filecache(filename)
> +        def revlog(self):
> +            return constructor()
> +
> +        def join(self, name):
> +            return opener.join(name)
> +
> +        def clear(self, clearfilecache):
> +            for k in self._filecache.keys():
> +                if clearfilecache:
> +                    del self._filecache[k]
> +                try:
> +                    delattr(self, k)
> +                except AttributeError:
> +                    pass
> +
> +        def refresh(self):
> +            for k, ce in self._filecache.items():
> +                if k not in self.__dict__:
> +                    continue
> +                ce.refresh()
> +
> +    return accessor()
> diff --git a/mercurial/manifest.py b/mercurial/manifest.py
> --- a/mercurial/manifest.py
> +++ b/mercurial/manifest.py
> @@ -1254,7 +1254,7 @@ class manifestlog(object):
>               usetreemanifest = opts.get('treemanifest', usetreemanifest)
>           self._treeinmem = usetreemanifest
>   
> -        self._oldmanifest = repo._constructmanifest()
> +        self._oldmanifest = repo.manifestaccessor.revlog
>           self._revlog = self._oldmanifest
>   
>           # We'll separate this into it's own cache once oldmanifest is no longer
> _______________________________________________
> Mercurial-devel mailing list
> Mercurial-devel@mercurial-scm.org
> https://urldefense.proofpoint.com/v2/url?u=https-3A__www.mercurial-2Dscm.org_mailman_listinfo_mercurial-2Ddevel&d=DQIGaQ&c=5VD0RTtNlTh3ycd41b3MUw&r=nuarHzhP1wi1T9iURRCj1A&m=FaX7fFZjM89DjuYNmCnhnV7ZNbcHdxiC0tnJmRFQSnY&s=IFdLTTntCXcFrGeZCVRg9ucxucsdcssbsl8Om89v5rY&e=
Yuya Nishihara - Nov. 5, 2016, 1:05 p.m.
On Thu, 3 Nov 2016 15:27:37 -0700, Durham Goode wrote:
> # HG changeset patch
> # User Durham Goode <durham@fb.com>
> # Date 1478208817 25200
> #      Thu Nov 03 14:33:37 2016 -0700
> # Branch stable
> # Node ID 1788ee9e1df92ac94b9be84eac6d16e3bad903a9
> # Parent  b9f7b0c10027764cee77f9c6d61877fcffea837f
> manifest: introduce an accessor class for manifests
> 
> This introduces a revlogaccessor class which can be used to allow multiple
> objects hold an auto-invalidating reference to a revlog, without having to hold
> a reference to the actual repo object. Future patches will switch repo.manifest
> and repo.manifestlog to access the manifest through this accessor. This will fix
> the circular reference caused by manifestlog and manifestctx holding a reference
> to the repo
> 
> diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py
> --- a/mercurial/localrepo.py
> +++ b/mercurial/localrepo.py
> @@ -514,6 +514,11 @@ class localrepository(object):
>          # manifest creation.
>          return manifest.manifest(self.svfs)
>  
> +    @unfilteredpropertycache
> +    def manifestaccessor(self):
> +        return revlogaccessor('00manifest.i', self.svfs,
> +                              self._constructmanifest)

Honestly I don't get why manifestlog and manifestctxs have to live longer
than the other repo properties. But suppose that is necessary, I agree we'll
need this kind of a wrapper. Maybe we can move the accessor to the manifestlog,
but still the accessor will have to be shared (and updated transparently) by
the manifestlog and its cachable manifestctxs.

> +def revlogaccessor(filename, opener, constructor):
> +    """Creates an accessor that provides cached and invalidated access to a
> +    revlog, via instance.revlog. This is useful for letting multiple objects
> +    hold a reference to the revlog, without having to hold a possibly-circular
> +    reference to the actual repository.  """
> +
> +    # We have to use a runtime type here, because the only way to create a
> +    # property is to put it on a class itself, and the property is dynamically
> +    # defined by the filename parameter.
> +    class accessor(object):

Perhaps we could refactor filecache to avoid dynamically creating a class, but
that would be a minor issue of this RFC series.
Pierre-Yves David - Nov. 7, 2016, 12:41 p.m.
On 11/05/2016 02:05 PM, Yuya Nishihara wrote:
> On Thu, 3 Nov 2016 15:27:37 -0700, Durham Goode wrote:
>> # HG changeset patch
>> # User Durham Goode <durham@fb.com>
>> # Date 1478208817 25200
>> #      Thu Nov 03 14:33:37 2016 -0700
>> # Branch stable
>> # Node ID 1788ee9e1df92ac94b9be84eac6d16e3bad903a9
>> # Parent  b9f7b0c10027764cee77f9c6d61877fcffea837f
>> manifest: introduce an accessor class for manifests
>>
>> This introduces a revlogaccessor class which can be used to allow multiple
>> objects hold an auto-invalidating reference to a revlog, without having to hold
>> a reference to the actual repo object. Future patches will switch repo.manifest
>> and repo.manifestlog to access the manifest through this accessor. This will fix
>> the circular reference caused by manifestlog and manifestctx holding a reference
>> to the repo
>>
>> diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py
>> --- a/mercurial/localrepo.py
>> +++ b/mercurial/localrepo.py
>> @@ -514,6 +514,11 @@ class localrepository(object):
>>          # manifest creation.
>>          return manifest.manifest(self.svfs)
>>
>> +    @unfilteredpropertycache
>> +    def manifestaccessor(self):
>> +        return revlogaccessor('00manifest.i', self.svfs,
>> +                              self._constructmanifest)
>
> Honestly I don't get why manifestlog and manifestctxs have to live longer
> than the other repo properties.

I'm also a bit curious about that.

> But suppose that is necessary, I agree we'll
> need this kind of a wrapper.

Any reason why we are using the wrapper approach over using a weak 
reference ? Weakref are not great but we use them in multiple spot when 
needed. this seems it would be simpler than the current approach, but I 
might be missing something.

> Maybe we can move the accessor to the manifestlog,
> but still the accessor will have to be shared (and updated transparently) by
> the manifestlog and its cachable manifestctxs.
>
>> +def revlogaccessor(filename, opener, constructor):
>> +    """Creates an accessor that provides cached and invalidated access to a
>> +    revlog, via instance.revlog. This is useful for letting multiple objects
>> +    hold a reference to the revlog, without having to hold a possibly-circular
>> +    reference to the actual repository.  """
>> +
>> +    # We have to use a runtime type here, because the only way to create a
>> +    # property is to put it on a class itself, and the property is dynamically
>> +    # defined by the filename parameter.
>> +    class accessor(object):
>
> Perhaps we could refactor filecache to avoid dynamically creating a class, but
> that would be a minor issue of this RFC series.
Durham Goode - Nov. 8, 2016, 3:53 p.m.
On 11/7/16 12:41 PM, Pierre-Yves David wrote:

> On 11/05/2016 02:05 PM, Yuya Nishihara wrote:
>> On Thu, 3 Nov 2016 15:27:37 -0700, Durham Goode wrote:
>>> # HG changeset patch
>>> # User Durham Goode <durham@fb.com>
>>> # Date 1478208817 25200
>>> #      Thu Nov 03 14:33:37 2016 -0700
>>> # Branch stable
>>> # Node ID 1788ee9e1df92ac94b9be84eac6d16e3bad903a9
>>> # Parent  b9f7b0c10027764cee77f9c6d61877fcffea837f
>>> manifest: introduce an accessor class for manifests
>>>
>>> This introduces a revlogaccessor class which can be used to allow 
>>> multiple
>>> objects hold an auto-invalidating reference to a revlog, without 
>>> having to hold
>>> a reference to the actual repo object. Future patches will switch 
>>> repo.manifest
>>> and repo.manifestlog to access the manifest through this accessor. 
>>> This will fix
>>> the circular reference caused by manifestlog and manifestctx holding 
>>> a reference
>>> to the repo
>>>
>>> diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py
>>> --- a/mercurial/localrepo.py
>>> +++ b/mercurial/localrepo.py
>>> @@ -514,6 +514,11 @@ class localrepository(object):
>>>          # manifest creation.
>>>          return manifest.manifest(self.svfs)
>>>
>>> +    @unfilteredpropertycache
>>> +    def manifestaccessor(self):
>>> +        return revlogaccessor('00manifest.i', self.svfs,
>>> +                              self._constructmanifest)
>>
>> Honestly I don't get why manifestlog and manifestctxs have to live 
>> longer
>> than the other repo properties.
>
> I'm also a bit curious about that.
 From a user expectation perspective, I'm trying to offer the same 
experience that changectx currently promises.  i.e. a user can hold on 
to the changectx and access it later, regardless of what locks or 
transactions happened in the interim.  Arguably changectx does a poor 
job of this (since you have to access some data first in order for it to 
be cached), but from a caller's perspective it usually "just works".  So 
the lifetime of a changectx is already longer than the actual repo or 
the specific changelog revlog instance.  So this change just makes 
manifestctx appear to have that same lifetime.
>
>> But suppose that is necessary, I agree we'll
>> need this kind of a wrapper.
>
> Any reason why we are using the wrapper approach over using a weak 
> reference ? Weakref are not great but we use them in multiple spot 
> when needed. this seems it would be simpler than the current approach, 
> but I might be missing something.
A weak reference to the localrepo instance?  I'd be fine with that, but 
I figured the community wouldn't like that since it it means the code is 
still circular (even if the memory references aren't).  If both of you 
are ok with the weak reference, I can send that as a series instead.
>
>> Maybe we can move the accessor to the manifestlog,
>> but still the accessor will have to be shared (and updated 
>> transparently) by
>> the manifestlog and its cachable manifestctxs.
>>
>>> +def revlogaccessor(filename, opener, constructor):
>>> +    """Creates an accessor that provides cached and invalidated 
>>> access to a
>>> +    revlog, via instance.revlog. This is useful for letting 
>>> multiple objects
>>> +    hold a reference to the revlog, without having to hold a 
>>> possibly-circular
>>> +    reference to the actual repository.  """
>>> +
>>> +    # We have to use a runtime type here, because the only way to 
>>> create a
>>> +    # property is to put it on a class itself, and the property is 
>>> dynamically
>>> +    # defined by the filename parameter.
>>> +    class accessor(object):
>>
>> Perhaps we could refactor filecache to avoid dynamically creating a 
>> class, but
>> that would be a minor issue of this RFC series.
Yuya Nishihara - Nov. 9, 2016, 1:53 p.m.
On Tue, 8 Nov 2016 15:53:39 +0000, Durham Goode wrote:
> On 11/7/16 12:41 PM, Pierre-Yves David wrote:
> > On 11/05/2016 02:05 PM, Yuya Nishihara wrote:
> >> On Thu, 3 Nov 2016 15:27:37 -0700, Durham Goode wrote:
> >>> # HG changeset patch
> >>> # User Durham Goode <durham@fb.com>
> >>> # Date 1478208817 25200
> >>> #      Thu Nov 03 14:33:37 2016 -0700
> >>> # Branch stable
> >>> # Node ID 1788ee9e1df92ac94b9be84eac6d16e3bad903a9
> >>> # Parent  b9f7b0c10027764cee77f9c6d61877fcffea837f
> >>> manifest: introduce an accessor class for manifests
> >>>
> >>> This introduces a revlogaccessor class which can be used to allow 
> >>> multiple
> >>> objects hold an auto-invalidating reference to a revlog, without 
> >>> having to hold
> >>> a reference to the actual repo object. Future patches will switch 
> >>> repo.manifest
> >>> and repo.manifestlog to access the manifest through this accessor. 
> >>> This will fix
> >>> the circular reference caused by manifestlog and manifestctx holding 
> >>> a reference
> >>> to the repo
> >>>
> >>> diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py
> >>> --- a/mercurial/localrepo.py
> >>> +++ b/mercurial/localrepo.py
> >>> @@ -514,6 +514,11 @@ class localrepository(object):
> >>>          # manifest creation.
> >>>          return manifest.manifest(self.svfs)
> >>>
> >>> +    @unfilteredpropertycache
> >>> +    def manifestaccessor(self):
> >>> +        return revlogaccessor('00manifest.i', self.svfs,
> >>> +                              self._constructmanifest)
> >>
> >> Honestly I don't get why manifestlog and manifestctxs have to live 
> >> longer
> >> than the other repo properties.
> >
> > I'm also a bit curious about that.
>  From a user expectation perspective, I'm trying to offer the same 
> experience that changectx currently promises.  i.e. a user can hold on 
> to the changectx and access it later, regardless of what locks or 
> transactions happened in the interim.  Arguably changectx does a poor 
> job of this (since you have to access some data first in order for it to 
> be cached), but from a caller's perspective it usually "just works".  So 
> the lifetime of a changectx is already longer than the actual repo or 
> the specific changelog revlog instance.  So this change just makes 
> manifestctx appear to have that same lifetime.

As I said before, I don't see changectx (and workingctx) guarantee that
because some properties are cached.

  ctx = repo[1]
  ctx.parents()
  # run 'hg strip 1'
  repo.invalidate()
  ctx.parents()  # cached

IMHO, the lifetime of ctx should end at repo.invalidate().

> >> But suppose that is necessary, I agree we'll
> >> need this kind of a wrapper.
> >
> > Any reason why we are using the wrapper approach over using a weak 
> > reference ? Weakref are not great but we use them in multiple spot 
> > when needed. this seems it would be simpler than the current approach, 
> > but I might be missing something.
> A weak reference to the localrepo instance?  I'd be fine with that, but 
> I figured the community wouldn't like that since it it means the code is 
> still circular (even if the memory references aren't).  If both of you 
> are ok with the weak reference, I can send that as a series instead.

I don't have strong opinion about weakref. It smells, but localrepo already
has many weakrefs.
Pierre-Yves David - Nov. 10, 2016, 2:53 p.m.
On 11/08/2016 03:53 PM, Durham Goode wrote:
> On 11/7/16 12:41 PM, Pierre-Yves David wrote:
>
>> On 11/05/2016 02:05 PM, Yuya Nishihara wrote:
>>> On Thu, 3 Nov 2016 15:27:37 -0700, Durham Goode wrote:
>>>> # HG changeset patch
>>>> # User Durham Goode <durham@fb.com>
>>>> # Date 1478208817 25200
>>>> #      Thu Nov 03 14:33:37 2016 -0700
>>>> # Branch stable
>>>> # Node ID 1788ee9e1df92ac94b9be84eac6d16e3bad903a9
>>>> # Parent  b9f7b0c10027764cee77f9c6d61877fcffea837f
>>>> manifest: introduce an accessor class for manifests
>>>>
>>>> This introduces a revlogaccessor class which can be used to allow
>>>> multiple
>>>> objects hold an auto-invalidating reference to a revlog, without
>>>> having to hold
>>>> a reference to the actual repo object. Future patches will switch
>>>> repo.manifest
>>>> and repo.manifestlog to access the manifest through this accessor.
>>>> This will fix
>>>> the circular reference caused by manifestlog and manifestctx holding
>>>> a reference
>>>> to the repo
>>>>
>>>> diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py
>>>> --- a/mercurial/localrepo.py
>>>> +++ b/mercurial/localrepo.py
>>>> @@ -514,6 +514,11 @@ class localrepository(object):
>>>>          # manifest creation.
>>>>          return manifest.manifest(self.svfs)
>>>>
>>>> +    @unfilteredpropertycache
>>>> +    def manifestaccessor(self):
>>>> +        return revlogaccessor('00manifest.i', self.svfs,
>>>> +                              self._constructmanifest)
>>>
>>> Honestly I don't get why manifestlog and manifestctxs have to live
>>> longer
>>> than the other repo properties.
>>
>> I'm also a bit curious about that.
> From a user expectation perspective, I'm trying to offer the same
> experience that changectx currently promises.  i.e. a user can hold on
> to the changectx and access it later, regardless of what locks or
> transactions happened in the interim.  Arguably changectx does a poor
> job of this (since you have to access some data first in order for it to
> be cached), but from a caller's perspective it usually "just works".  So
> the lifetime of a changectx is already longer than the actual repo or
> the specific changelog revlog instance.  So this change just makes
> manifestctx appear to have that same lifetime.

I'm not sure how this is actually True for the changectx themself, This 
might work for the most part by accident but I'm unsure this is an 
explicit goal and (non-)API garantee. Do we actually explicitly rely on 
that anywhere in the codebase?

In all cases, I see changectx significantly higher level than manifestct 
in the API. Many part of the code needs a "rich" object to represent a 
change but very few place actually want to look and manipulate the 
manifest directly. I'm still in the dark regarding you actual plan here, 
why do you need to augmented the capability of the manifestctx object?

(note, "user" here is a bit confusing because I don't expect user to be 
ever aware of any code internal, we are talking about caller or 
extension writer here, right?)

>>> But suppose that is necessary, I agree we'll
>>> need this kind of a wrapper.
>>
>> Any reason why we are using the wrapper approach over using a weak
>> reference ? Weakref are not great but we use them in multiple spot
>> when needed. this seems it would be simpler than the current approach,
>> but I might be missing something.
> A weak reference to the localrepo instance?  I'd be fine with that, but
> I figured the community wouldn't like that since it it means the code is
> still circular (even if the memory references aren't).  If both of you
> are ok with the weak reference, I can send that as a series instead.
>>
>>> Maybe we can move the accessor to the manifestlog,
>>> but still the accessor will have to be shared (and updated
>>> transparently) by
>>> the manifestlog and its cachable manifestctxs.
>>>
>>>> +def revlogaccessor(filename, opener, constructor):
>>>> +    """Creates an accessor that provides cached and invalidated
>>>> access to a
>>>> +    revlog, via instance.revlog. This is useful for letting
>>>> multiple objects
>>>> +    hold a reference to the revlog, without having to hold a
>>>> possibly-circular
>>>> +    reference to the actual repository.  """
>>>> +
>>>> +    # We have to use a runtime type here, because the only way to
>>>> create a
>>>> +    # property is to put it on a class itself, and the property is
>>>> dynamically
>>>> +    # defined by the filename parameter.
>>>> +    class accessor(object):
>>>
>>> Perhaps we could refactor filecache to avoid dynamically creating a
>>> class, but
>>> that would be a minor issue of this RFC series.
Durham Goode - Nov. 11, 2016, 9:24 a.m.
On 11/10/16 2:53 PM, Pierre-Yves David wrote:
>
>
> On 11/08/2016 03:53 PM, Durham Goode wrote:
>> On 11/7/16 12:41 PM, Pierre-Yves David wrote:
>>
>>> On 11/05/2016 02:05 PM, Yuya Nishihara wrote:
>>>> On Thu, 3 Nov 2016 15:27:37 -0700, Durham Goode wrote:
>>>>> # HG changeset patch
>>>>> # User Durham Goode <durham@fb.com>
>>>>> # Date 1478208817 25200
>>>>> #      Thu Nov 03 14:33:37 2016 -0700
>>>>> # Branch stable
>>>>> # Node ID 1788ee9e1df92ac94b9be84eac6d16e3bad903a9
>>>>> # Parent  b9f7b0c10027764cee77f9c6d61877fcffea837f
>>>>> manifest: introduce an accessor class for manifests
>>>>>
>>>>> This introduces a revlogaccessor class which can be used to allow
>>>>> multiple
>>>>> objects hold an auto-invalidating reference to a revlog, without
>>>>> having to hold
>>>>> a reference to the actual repo object. Future patches will switch
>>>>> repo.manifest
>>>>> and repo.manifestlog to access the manifest through this accessor.
>>>>> This will fix
>>>>> the circular reference caused by manifestlog and manifestctx holding
>>>>> a reference
>>>>> to the repo
>>>>>
>>>>> diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py
>>>>> --- a/mercurial/localrepo.py
>>>>> +++ b/mercurial/localrepo.py
>>>>> @@ -514,6 +514,11 @@ class localrepository(object):
>>>>>          # manifest creation.
>>>>>          return manifest.manifest(self.svfs)
>>>>>
>>>>> +    @unfilteredpropertycache
>>>>> +    def manifestaccessor(self):
>>>>> +        return revlogaccessor('00manifest.i', self.svfs,
>>>>> +                              self._constructmanifest)
>>>>
>>>> Honestly I don't get why manifestlog and manifestctxs have to live
>>>> longer
>>>> than the other repo properties.
>>>
>>> I'm also a bit curious about that.
>> From a user expectation perspective, I'm trying to offer the same
>> experience that changectx currently promises.  i.e. a user can hold on
>> to the changectx and access it later, regardless of what locks or
>> transactions happened in the interim.  Arguably changectx does a poor
>> job of this (since you have to access some data first in order for it to
>> be cached), but from a caller's perspective it usually "just works".  So
>> the lifetime of a changectx is already longer than the actual repo or
>> the specific changelog revlog instance.  So this change just makes
>> manifestctx appear to have that same lifetime.
>
> I'm not sure how this is actually True for the changectx themself, 
> This might work for the most part by accident but I'm unsure this is 
> an explicit goal and (non-)API garantee. Do we actually explicitly 
> rely on that anywhere in the codebase?
>
> In all cases, I see changectx significantly higher level than 
> manifestct in the API. Many part of the code needs a "rich" object to 
> represent a change but very few place actually want to look and 
> manipulate the manifest directly. I'm still in the dark regarding you 
> actual plan here, why do you need to augmented the capability of the 
> manifestctx object?
>
> (note, "user" here is a bit confusing because I don't expect user to 
> be ever aware of any code internal, we are talking about caller or 
> extension writer here, right?)
Alright, I'll just send a patch that makes it pass around the 
manifestrevlog instead of the repo to the manifestlog and manifestctx. I 
think it may lead to subtle bugs in the future (like when attempting to 
access an old manifestctx), but it's probably not a big deal for now.
Jun Wu - Nov. 11, 2016, 11:56 p.m.
Excerpts from Pierre-Yves David's message of 2016-11-10 14:53:51 +0000:
> 
> On 11/08/2016 03:53 PM, Durham Goode wrote:
> > On 11/7/16 12:41 PM, Pierre-Yves David wrote:
> >
> >> On 11/05/2016 02:05 PM, Yuya Nishihara wrote:
> >>> On Thu, 3 Nov 2016 15:27:37 -0700, Durham Goode wrote:
> >>>> # HG changeset patch
> >>>> # User Durham Goode <durham@fb.com>
> >>>> # Date 1478208817 25200
> >>>> #      Thu Nov 03 14:33:37 2016 -0700
> >>>> # Branch stable
> >>>> # Node ID 1788ee9e1df92ac94b9be84eac6d16e3bad903a9
> >>>> # Parent  b9f7b0c10027764cee77f9c6d61877fcffea837f
> >>>> manifest: introduce an accessor class for manifests
> >>>>
> >>>> This introduces a revlogaccessor class which can be used to allow
> >>>> multiple
> >>>> objects hold an auto-invalidating reference to a revlog, without
> >>>> having to hold
> >>>> a reference to the actual repo object. Future patches will switch
> >>>> repo.manifest
> >>>> and repo.manifestlog to access the manifest through this accessor.
> >>>> This will fix
> >>>> the circular reference caused by manifestlog and manifestctx holding
> >>>> a reference
> >>>> to the repo
> >>>>
> >>>> diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py
> >>>> --- a/mercurial/localrepo.py
> >>>> +++ b/mercurial/localrepo.py
> >>>> @@ -514,6 +514,11 @@ class localrepository(object):
> >>>>          # manifest creation.
> >>>>          return manifest.manifest(self.svfs)
> >>>>
> >>>> +    @unfilteredpropertycache
> >>>> +    def manifestaccessor(self):
> >>>> +        return revlogaccessor('00manifest.i', self.svfs,
> >>>> +                              self._constructmanifest)
> >>>
> >>> Honestly I don't get why manifestlog and manifestctxs have to live
> >>> longer
> >>> than the other repo properties.
> >>
> >> I'm also a bit curious about that.
> > From a user expectation perspective, I'm trying to offer the same
> > experience that changectx currently promises.  i.e. a user can hold on
> > to the changectx and access it later, regardless of what locks or
> > transactions happened in the interim.  Arguably changectx does a poor
> > job of this (since you have to access some data first in order for it to
> > be cached), but from a caller's perspective it usually "just works".  So
> > the lifetime of a changectx is already longer than the actual repo or
> > the specific changelog revlog instance.  So this change just makes
> > manifestctx appear to have that same lifetime.
> 
> I'm not sure how this is actually True for the changectx themself, This 
> might work for the most part by accident but I'm unsure this is an 
> explicit goal and (non-)API garantee. Do we actually explicitly rely on 
> that anywhere in the codebase?

I'd like note that keeping changectx objects across strip is definitely a
disaster. It's also problematic if the changectx got stripped for an aborted
transaction.

In general, I don't think keeping changesets across repo objects is a good
idea. Although I could be convinced by good use-cases.

Patch

diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py
--- a/mercurial/localrepo.py
+++ b/mercurial/localrepo.py
@@ -514,6 +514,11 @@  class localrepository(object):
         # manifest creation.
         return manifest.manifest(self.svfs)
 
+    @unfilteredpropertycache
+    def manifestaccessor(self):
+        return revlogaccessor('00manifest.i', self.svfs,
+                              self._constructmanifest)
+
     @storecache('00manifest.i')
     def manifestlog(self):
         return manifest.manifestlog(self.svfs, self)
@@ -1275,6 +1280,8 @@  class localrepository(object):
                 delattr(unfiltered, k)
             except AttributeError:
                 pass
+        self.manifestaccessor.clear(clearfilecache)
+
         self.invalidatecaches()
         if not self.currenttransaction():
             # TODO: Changing contents of store outside transaction
@@ -1296,6 +1303,7 @@  class localrepository(object):
             if k == 'dirstate' or k not in self.__dict__:
                 continue
             ce.refresh()
+        self.manifestaccessor.refresh()
 
     def _lock(self, vfs, lockname, wait, releasefn, acquirefn, desc,
               inheritchecker=None, parentenvvar=None):
@@ -2004,3 +2012,40 @@  def newreporequirements(repo):
         requirements.add('manifestv2')
 
     return requirements
+
+def revlogaccessor(filename, opener, constructor):
+    """Creates an accessor that provides cached and invalidated access to a
+    revlog, via instance.revlog. This is useful for letting multiple objects
+    hold a reference to the revlog, without having to hold a possibly-circular
+    reference to the actual repository.  """
+
+    # We have to use a runtime type here, because the only way to create a
+    # property is to put it on a class itself, and the property is dynamically
+    # defined by the filename parameter.
+    class accessor(object):
+        def __init__(self):
+            self._filecache = {}
+
+        @scmutil.filecache(filename)
+        def revlog(self):
+            return constructor()
+
+        def join(self, name):
+            return opener.join(name)
+
+        def clear(self, clearfilecache):
+            for k in self._filecache.keys():
+                if clearfilecache:
+                    del self._filecache[k]
+                try:
+                    delattr(self, k)
+                except AttributeError:
+                    pass
+
+        def refresh(self):
+            for k, ce in self._filecache.items():
+                if k not in self.__dict__:
+                    continue
+                ce.refresh()
+
+    return accessor()
diff --git a/mercurial/manifest.py b/mercurial/manifest.py
--- a/mercurial/manifest.py
+++ b/mercurial/manifest.py
@@ -1254,7 +1254,7 @@  class manifestlog(object):
             usetreemanifest = opts.get('treemanifest', usetreemanifest)
         self._treeinmem = usetreemanifest
 
-        self._oldmanifest = repo._constructmanifest()
+        self._oldmanifest = repo.manifestaccessor.revlog
         self._revlog = self._oldmanifest
 
         # We'll separate this into it's own cache once oldmanifest is no longer