Submitter | Durham Goode |
---|---|
Date | March 3, 2017, 7:34 p.m. |
Message ID | <207763e895c7d24885df.1488569660@dev111.prn1.facebook.com> |
Download | mbox | patch |
Permalink | /patch/18904/ |
State | Accepted |
Delegated to: | Martin von Zweigbergk |
Headers | show |
Comments
On Fri, Mar 3, 2017 at 11:34 AM, Durham Goode <durham@fb.com> wrote: > # HG changeset patch > # User Durham Goode <durham@fb.com> > # Date 1488569391 28800 > # Fri Mar 03 11:29:51 2017 -0800 > # Node ID 207763e895c7d24885df22f5b9c0df5494d77daf > # Parent 78e0fb2bd1bc972444ae08e7b7165da66cbf53a3 > context: remove uses of manifest.matches > > This removes the uses of manifest.matches in context.py in favor of the new api. > This is part of removing manifest.matches since it is O(manifest). > > diff --git a/mercurial/context.py b/mercurial/context.py > --- a/mercurial/context.py > +++ b/mercurial/context.py > @@ -18,7 +18,6 @@ from .node import ( > bin, > hex, > modifiednodeid, > - newnodeid, > nullid, > nullrev, > short, > @@ -91,15 +90,6 @@ class basectx(object): > def __iter__(self): > return iter(self._manifest) > > - def _manifestmatches(self, match, s): > - """generate a new manifest filtered by the match argument > - > - This method is for internal use only and mainly exists to provide an > - object oriented way for other contexts to customize the manifest > - generation. > - """ > - return self.manifest().matches(match) > - > def _matchstatus(self, other, match): > """return match.always if match is none > > @@ -119,15 +109,15 @@ class basectx(object): > # delta application. > if self.rev() is not None and self.rev() < other.rev(): > self.manifest() > - mf1 = other._manifestmatches(match, s) > - mf2 = self._manifestmatches(match, s) > + mf1 = other.manifest() > + mf2 = self.manifest() > > modified, added = [], [] > removed = [] > clean = [] > deleted, unknown, ignored = s.deleted, s.unknown, s.ignored > deletedset = set(deleted) > - d = mf1.diff(mf2, clean=listclean) > + d = mf1.diff(mf2, match=match, clean=listclean) > for fn, value in d.iteritems(): > if fn in deletedset: > continue > @@ -154,8 +144,10 @@ class basectx(object): > > if removed: > # need to filter files if they are already reported as removed > - unknown = [fn for fn in unknown if fn not in mf1] > - ignored = [fn for fn in ignored if fn not in mf1] > + unknown = [fn for fn in unknown if fn not in mf1 and > + (not match or match(fn))] > + ignored = [fn for fn in ignored if fn not in mf1 and > + (not match or match(fn))] > # if they're deleted, don't report them as removed > removed = [fn for fn in removed if fn not in deletedset] > > @@ -1608,22 +1600,6 @@ class workingctx(committablectx): > pass > return modified, fixup > > - def _manifestmatches(self, match, s): > - """Slow path for workingctx > - > - The fast path is when we compare the working directory to its parent > - which means this function is comparing with a non-parent; therefore we > - need to build a manifest and return what matches. > - """ > - mf = self._repo['.']._manifestmatches(match, s) > - for f in s.modified + s.added: > - mf[f] = newnodeid > - mf.setflag(f, self.flags(f)) > - for f in s.removed: > - if f in mf: > - del mf[f] > - return mf > - Nice! But could you explain why this is no longer needed? Is it completely thanks to the new manifest.diff() API or something else? > def _dirstatestatus(self, match=None, ignored=False, clean=False, > unknown=False): > '''Gets the status from the dirstate -- internal use only.''' > _______________________________________________ > Mercurial-devel mailing list > Mercurial-devel@mercurial-scm.org > https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
On 3/7/17 2:40 PM, Martin von Zweigbergk wrote: > On Fri, Mar 3, 2017 at 11:34 AM, Durham Goode <durham@fb.com> wrote: >> # HG changeset patch >> # User Durham Goode <durham@fb.com> >> # Date 1488569391 28800 >> # Fri Mar 03 11:29:51 2017 -0800 >> # Node ID 207763e895c7d24885df22f5b9c0df5494d77daf >> # Parent 78e0fb2bd1bc972444ae08e7b7165da66cbf53a3 >> context: remove uses of manifest.matches >> >> This removes the uses of manifest.matches in context.py in favor of the new api. >> This is part of removing manifest.matches since it is O(manifest). >> >> diff --git a/mercurial/context.py b/mercurial/context.py >> --- a/mercurial/context.py >> +++ b/mercurial/context.py >> @@ -18,7 +18,6 @@ from .node import ( >> bin, >> hex, >> modifiednodeid, >> - newnodeid, >> nullid, >> nullrev, >> short, >> @@ -91,15 +90,6 @@ class basectx(object): >> def __iter__(self): >> return iter(self._manifest) >> >> - def _manifestmatches(self, match, s): >> - """generate a new manifest filtered by the match argument >> - >> - This method is for internal use only and mainly exists to provide an >> - object oriented way for other contexts to customize the manifest >> - generation. >> - """ >> - return self.manifest().matches(match) >> - >> def _matchstatus(self, other, match): >> """return match.always if match is none >> >> @@ -119,15 +109,15 @@ class basectx(object): >> # delta application. >> if self.rev() is not None and self.rev() < other.rev(): >> self.manifest() >> - mf1 = other._manifestmatches(match, s) >> - mf2 = self._manifestmatches(match, s) >> + mf1 = other.manifest() >> + mf2 = self.manifest() >> >> modified, added = [], [] >> removed = [] >> clean = [] >> deleted, unknown, ignored = s.deleted, s.unknown, s.ignored >> deletedset = set(deleted) >> - d = mf1.diff(mf2, clean=listclean) >> + d = mf1.diff(mf2, match=match, clean=listclean) >> for fn, value in d.iteritems(): >> if fn in deletedset: >> continue >> @@ -154,8 +144,10 @@ class basectx(object): >> >> if removed: >> # need to filter files if they are already reported as removed >> - unknown = [fn for fn in unknown if fn not in mf1] >> - ignored = [fn for fn in ignored if fn not in mf1] >> + unknown = [fn for fn in unknown if fn not in mf1 and >> + (not match or match(fn))] >> + ignored = [fn for fn in ignored if fn not in mf1 and >> + (not match or match(fn))] >> # if they're deleted, don't report them as removed >> removed = [fn for fn in removed if fn not in deletedset] >> >> @@ -1608,22 +1600,6 @@ class workingctx(committablectx): >> pass >> return modified, fixup >> >> - def _manifestmatches(self, match, s): >> - """Slow path for workingctx >> - >> - The fast path is when we compare the working directory to its parent >> - which means this function is comparing with a non-parent; therefore we >> - need to build a manifest and return what matches. >> - """ >> - mf = self._repo['.']._manifestmatches(match, s) >> - for f in s.modified + s.added: >> - mf[f] = newnodeid >> - mf.setflag(f, self.flags(f)) >> - for f in s.removed: >> - if f in mf: >> - del mf[f] >> - return mf >> - > > Nice! But could you explain why this is no longer needed? Is it > completely thanks to the new manifest.diff() API or something else? My original reason was because self._manifest() could already generate this manifest (and does a more accurate job of it than this function), and since the matcher was moved to the diff layer, nothing here would be lost by switching to self._manifest(). Upon looking deeper, it looks like the big benefit here is that the status is provided, and therefore we don't have to call status a second time, (i.e. when self._manifest calls 'self._status()'). This has the additional benefit that the matcher can be applied to the status output early. But this only applies when diffing the working copy against a commit that is not the parent of the working copy, so a somewhat niche case. I have some ideas about how I could refactor this area to address this, so I'll include them in V2. It might make the series 10-12 patches long though.
On Tue, Mar 7, 2017 at 4:13 PM, Durham Goode <durham@fb.com> wrote: > > > On 3/7/17 2:40 PM, Martin von Zweigbergk wrote: >> >> On Fri, Mar 3, 2017 at 11:34 AM, Durham Goode <durham@fb.com> wrote: >>> >>> # HG changeset patch >>> # User Durham Goode <durham@fb.com> >>> # Date 1488569391 28800 >>> # Fri Mar 03 11:29:51 2017 -0800 >>> # Node ID 207763e895c7d24885df22f5b9c0df5494d77daf >>> # Parent 78e0fb2bd1bc972444ae08e7b7165da66cbf53a3 >>> context: remove uses of manifest.matches >>> >>> This removes the uses of manifest.matches in context.py in favor of the >>> new api. >>> This is part of removing manifest.matches since it is O(manifest). >>> >>> diff --git a/mercurial/context.py b/mercurial/context.py >>> --- a/mercurial/context.py >>> +++ b/mercurial/context.py >>> @@ -18,7 +18,6 @@ from .node import ( >>> bin, >>> hex, >>> modifiednodeid, >>> - newnodeid, >>> nullid, >>> nullrev, >>> short, >>> @@ -91,15 +90,6 @@ class basectx(object): >>> def __iter__(self): >>> return iter(self._manifest) >>> >>> - def _manifestmatches(self, match, s): >>> - """generate a new manifest filtered by the match argument >>> - >>> - This method is for internal use only and mainly exists to >>> provide an >>> - object oriented way for other contexts to customize the manifest >>> - generation. >>> - """ >>> - return self.manifest().matches(match) >>> - >>> def _matchstatus(self, other, match): >>> """return match.always if match is none >>> >>> @@ -119,15 +109,15 @@ class basectx(object): >>> # delta application. >>> if self.rev() is not None and self.rev() < other.rev(): >>> self.manifest() >>> - mf1 = other._manifestmatches(match, s) >>> - mf2 = self._manifestmatches(match, s) >>> + mf1 = other.manifest() >>> + mf2 = self.manifest() >>> >>> modified, added = [], [] >>> removed = [] >>> clean = [] >>> deleted, unknown, ignored = s.deleted, s.unknown, s.ignored >>> deletedset = set(deleted) >>> - d = mf1.diff(mf2, clean=listclean) >>> + d = mf1.diff(mf2, match=match, clean=listclean) >>> for fn, value in d.iteritems(): >>> if fn in deletedset: >>> continue >>> @@ -154,8 +144,10 @@ class basectx(object): >>> >>> if removed: >>> # need to filter files if they are already reported as >>> removed >>> - unknown = [fn for fn in unknown if fn not in mf1] >>> - ignored = [fn for fn in ignored if fn not in mf1] >>> + unknown = [fn for fn in unknown if fn not in mf1 and >>> + (not match or match(fn))] >>> + ignored = [fn for fn in ignored if fn not in mf1 and >>> + (not match or match(fn))] >>> # if they're deleted, don't report them as removed >>> removed = [fn for fn in removed if fn not in deletedset] >>> >>> @@ -1608,22 +1600,6 @@ class workingctx(committablectx): >>> pass >>> return modified, fixup >>> >>> - def _manifestmatches(self, match, s): >>> - """Slow path for workingctx >>> - >>> - The fast path is when we compare the working directory to its >>> parent >>> - which means this function is comparing with a non-parent; >>> therefore we >>> - need to build a manifest and return what matches. >>> - """ >>> - mf = self._repo['.']._manifestmatches(match, s) >>> - for f in s.modified + s.added: >>> - mf[f] = newnodeid >>> - mf.setflag(f, self.flags(f)) >>> - for f in s.removed: >>> - if f in mf: >>> - del mf[f] >>> - return mf >>> - >> >> >> Nice! But could you explain why this is no longer needed? Is it >> completely thanks to the new manifest.diff() API or something else? > > > My original reason was because self._manifest() could already generate this > manifest (and does a more accurate job of it than this function), and since > the matcher was moved to the diff layer, nothing here would be lost by > switching to self._manifest(). > > Upon looking deeper, it looks like the big benefit here is that the status > is provided, and therefore we don't have to call status a second time, (i.e. > when self._manifest calls 'self._status()'). This has the additional benefit > that the matcher can be applied to the status output early. But this only > applies when diffing the working copy against a commit that is not the > parent of the working copy, so a somewhat niche case. Thanks for checking. > > I have some ideas about how I could refactor this area to address this, so > I'll include them in V2. It might make the series 10-12 patches long though. That sounds fine to me.
Patch
diff --git a/mercurial/context.py b/mercurial/context.py --- a/mercurial/context.py +++ b/mercurial/context.py @@ -18,7 +18,6 @@ from .node import ( bin, hex, modifiednodeid, - newnodeid, nullid, nullrev, short, @@ -91,15 +90,6 @@ class basectx(object): def __iter__(self): return iter(self._manifest) - def _manifestmatches(self, match, s): - """generate a new manifest filtered by the match argument - - This method is for internal use only and mainly exists to provide an - object oriented way for other contexts to customize the manifest - generation. - """ - return self.manifest().matches(match) - def _matchstatus(self, other, match): """return match.always if match is none @@ -119,15 +109,15 @@ class basectx(object): # delta application. if self.rev() is not None and self.rev() < other.rev(): self.manifest() - mf1 = other._manifestmatches(match, s) - mf2 = self._manifestmatches(match, s) + mf1 = other.manifest() + mf2 = self.manifest() modified, added = [], [] removed = [] clean = [] deleted, unknown, ignored = s.deleted, s.unknown, s.ignored deletedset = set(deleted) - d = mf1.diff(mf2, clean=listclean) + d = mf1.diff(mf2, match=match, clean=listclean) for fn, value in d.iteritems(): if fn in deletedset: continue @@ -154,8 +144,10 @@ class basectx(object): if removed: # need to filter files if they are already reported as removed - unknown = [fn for fn in unknown if fn not in mf1] - ignored = [fn for fn in ignored if fn not in mf1] + unknown = [fn for fn in unknown if fn not in mf1 and + (not match or match(fn))] + ignored = [fn for fn in ignored if fn not in mf1 and + (not match or match(fn))] # if they're deleted, don't report them as removed removed = [fn for fn in removed if fn not in deletedset] @@ -1608,22 +1600,6 @@ class workingctx(committablectx): pass return modified, fixup - def _manifestmatches(self, match, s): - """Slow path for workingctx - - The fast path is when we compare the working directory to its parent - which means this function is comparing with a non-parent; therefore we - need to build a manifest and return what matches. - """ - mf = self._repo['.']._manifestmatches(match, s) - for f in s.modified + s.added: - mf[f] = newnodeid - mf.setflag(f, self.flags(f)) - for f in s.removed: - if f in mf: - del mf[f] - return mf - def _dirstatestatus(self, match=None, ignored=False, clean=False, unknown=False): '''Gets the status from the dirstate -- internal use only.'''