Submitter | Sean Farley |
---|---|
Date | Aug. 19, 2014, 2:42 a.m. |
Message ID | <594d00e49cea6cd2ebb9.1408408958@1.0.0.127.in-addr.arpa> |
Download | mbox | patch |
Permalink | /patch/5492/ |
State | Changes Requested |
Headers | show |
Comments
On 08/18/2014 07:42 PM, Sean Farley wrote: > # HG changeset patch > # User Sean Farley <sean.michael.farley@gmail.com> > # Date 1408408678 18000 > # Mon Aug 18 19:37:58 2014 -0500 > # Node ID 594d00e49cea6cd2ebb92c0570e7502f8797e232 > # Parent 8dda6f6ff564d8fe6ac7b8ce4c74eb9bfb5de14a > basectx: cache filectx generation for all contexts > > Previously, filecontexts are generated on the fly but not cached. Should be "were" > We need to keep them around if we're going to allow the data inside > to be changed. So the motivation for this is the ability to store content when we forge a changeset in memory. You should make it more explicite > This change is added to basectx so that other contexts that inherit can > benefit as well. I'm pretty sure we do not want filectx to be cache in the general case. - There is people working with insanely huge working copy. - There is tool keeping changectx around (like probably hgview) assuming its a cheap object - The usual assumption is that Mercurial requires about O(<largest file size>) memory. You are making this O(<largest changesets content>) I recommend taking the easiest road first. Implement caching only for "Content overwritten in a memctx". Other people will be eager to optimize anything as long as you have something working that can get some users. Also, the filectx object keep a reference to the changectx. So you cannot hold a reference from the changectx to the filectx without introducing a cycle > Currently, this is disabled for memctx objects that are passed a callable data > store function. > > diff --git a/mercurial/context.py b/mercurial/context.py > --- a/mercurial/context.py > +++ b/mercurial/context.py > @@ -31,10 +31,11 @@ class basectx(object): > o = super(basectx, cls).__new__(cls) > > o._repo = repo > o._rev = nullrev > o._node = nullid This would need some documentation about what is contains and when it is disabled. > + o._filectxs = {} > > return o > > def __str__(self): > return short(self.node()) > @@ -56,11 +57,20 @@ class basectx(object): > > def __contains__(self, key): > return key in self._manifest > > def __getitem__(self, key): > - return self.filectx(key) > + try: > + return self._filectxs[key] > + except TypeError: > + # filectx caching is not enabled > + return self.filectx(key) > + except KeyError: > + v = self.filectx(key) > + self._filectxs[key] = v > + return v > + From what I read below, self._filectxs is either a dict of None. So you should: 1) use "self._filectxs is None" instead of catching a type error 2) use "self._filectxs.get(…)" instead of KeyError > > def __iter__(self): > for f in sorted(self._manifest): > yield f > > @@ -1614,10 +1624,14 @@ class memctx(committablectx): > copied = copied[0] > return memfilectx(repo, path, fctx.data(), > islink=fctx.islink(), isexec=fctx.isexec(), > copied=copied, memctx=memctx) > self._filectxfn = getfilectx > + else: > + # disable filectx caching for users that send in custom function, > + # e.g. hg convert > + self._filectxs = None > > self._extra = extra and extra.copy() or {} > if self._extra.get('branch', '') == '': > self._extra['branch'] = 'default' > > _______________________________________________ > Mercurial-devel mailing list > Mercurial-devel@selenic.com > http://selenic.com/mailman/listinfo/mercurial-devel >
Pierre-Yves David writes: > On 08/18/2014 07:42 PM, Sean Farley wrote: >> # HG changeset patch >> # User Sean Farley <sean.michael.farley@gmail.com> >> # Date 1408408678 18000 >> # Mon Aug 18 19:37:58 2014 -0500 >> # Node ID 594d00e49cea6cd2ebb92c0570e7502f8797e232 >> # Parent 8dda6f6ff564d8fe6ac7b8ce4c74eb9bfb5de14a >> basectx: cache filectx generation for all contexts >> >> Previously, filecontexts are generated on the fly but not cached. > > Should be "were" Well, I'll be ... grammar corrections from Pierre-Yves. >> We need to keep them around if we're going to allow the data inside >> to be changed. > > So the motivation for this is the ability to store content when we forge > a changeset in memory. You should make it more explicite I was originally going to disagree with you but I have now changed my mind. I agree that this should just be done in memctx. >> This change is added to basectx so that other contexts that inherit can >> benefit as well. > > I'm pretty sure we do not want filectx to be cache in the general case. > > - There is people working with insanely huge working copy. > > - There is tool keeping changectx around (like probably hgview) assuming > its a cheap object > > - The usual assumption is that Mercurial requires about O(<largest file > size>) memory. You are making this O(<largest changesets content>) > > I recommend taking the easiest road first. Implement caching only for > "Content overwritten in a memctx". Other people will be eager to > optimize anything as long as you have something working that can get > some users. I agree. I'll change it. > Also, the filectx object keep a reference to the changectx. So you > cannot hold a reference from the changectx to the filectx without > introducing a cycle Would this be a real problem now? Or are you saying that there's another way? >> Currently, this is disabled for memctx objects that are passed a callable data >> store function. >> >> diff --git a/mercurial/context.py b/mercurial/context.py >> --- a/mercurial/context.py >> +++ b/mercurial/context.py >> @@ -31,10 +31,11 @@ class basectx(object): >> o = super(basectx, cls).__new__(cls) >> >> o._repo = repo >> o._rev = nullrev >> o._node = nullid > > This would need some documentation about what is contains and when it is > disabled. Sure. >> + o._filectxs = {} >> >> return o >> >> def __str__(self): >> return short(self.node()) >> @@ -56,11 +57,20 @@ class basectx(object): >> >> def __contains__(self, key): >> return key in self._manifest >> >> def __getitem__(self, key): >> - return self.filectx(key) >> + try: >> + return self._filectxs[key] >> + except TypeError: >> + # filectx caching is not enabled >> + return self.filectx(key) >> + except KeyError: >> + v = self.filectx(key) >> + self._filectxs[key] = v >> + return v >> + > > From what I read below, self._filectxs is either a dict of None. So you > should: > > 1) use "self._filectxs is None" instead of catching a type error > 2) use "self._filectxs.get(…)" instead of KeyError Oops, yeah, too much hacking on my part.
Patch
diff --git a/mercurial/context.py b/mercurial/context.py --- a/mercurial/context.py +++ b/mercurial/context.py @@ -31,10 +31,11 @@ class basectx(object): o = super(basectx, cls).__new__(cls) o._repo = repo o._rev = nullrev o._node = nullid + o._filectxs = {} return o def __str__(self): return short(self.node()) @@ -56,11 +57,20 @@ class basectx(object): def __contains__(self, key): return key in self._manifest def __getitem__(self, key): - return self.filectx(key) + try: + return self._filectxs[key] + except TypeError: + # filectx caching is not enabled + return self.filectx(key) + except KeyError: + v = self.filectx(key) + self._filectxs[key] = v + return v + def __iter__(self): for f in sorted(self._manifest): yield f @@ -1614,10 +1624,14 @@ class memctx(committablectx): copied = copied[0] return memfilectx(repo, path, fctx.data(), islink=fctx.islink(), isexec=fctx.isexec(), copied=copied, memctx=memctx) self._filectxfn = getfilectx + else: + # disable filectx caching for users that send in custom function, + # e.g. hg convert + self._filectxs = None self._extra = extra and extra.copy() or {} if self._extra.get('branch', '') == '': self._extra['branch'] = 'default'