Patchwork [5,of,6] context: remove uses of manifest.matches

login
register
mail settings
Submitter Durham Goode
Date March 3, 2017, 7:34 p.m.
Message ID <207763e895c7d24885df.1488569660@dev111.prn1.facebook.com>
Download mbox | patch
Permalink /patch/18904/
State Accepted
Delegated to: Martin von Zweigbergk
Headers show

Comments

Durham Goode - March 3, 2017, 7:34 p.m.
# HG changeset patch
# User Durham Goode <durham@fb.com>
# Date 1488569391 28800
#      Fri Mar 03 11:29:51 2017 -0800
# Node ID 207763e895c7d24885df22f5b9c0df5494d77daf
# Parent  78e0fb2bd1bc972444ae08e7b7165da66cbf53a3
context: remove uses of manifest.matches

This removes the uses of manifest.matches in context.py in favor of the new api.
This is part of removing manifest.matches since it is O(manifest).
via Mercurial-devel - March 7, 2017, 10:40 p.m.
On Fri, Mar 3, 2017 at 11:34 AM, Durham Goode <durham@fb.com> wrote:
> # HG changeset patch
> # User Durham Goode <durham@fb.com>
> # Date 1488569391 28800
> #      Fri Mar 03 11:29:51 2017 -0800
> # Node ID 207763e895c7d24885df22f5b9c0df5494d77daf
> # Parent  78e0fb2bd1bc972444ae08e7b7165da66cbf53a3
> context: remove uses of manifest.matches
>
> This removes the uses of manifest.matches in context.py in favor of the new api.
> This is part of removing manifest.matches since it is O(manifest).
>
> diff --git a/mercurial/context.py b/mercurial/context.py
> --- a/mercurial/context.py
> +++ b/mercurial/context.py
> @@ -18,7 +18,6 @@ from .node import (
>      bin,
>      hex,
>      modifiednodeid,
> -    newnodeid,
>      nullid,
>      nullrev,
>      short,
> @@ -91,15 +90,6 @@ class basectx(object):
>      def __iter__(self):
>          return iter(self._manifest)
>
> -    def _manifestmatches(self, match, s):
> -        """generate a new manifest filtered by the match argument
> -
> -        This method is for internal use only and mainly exists to provide an
> -        object oriented way for other contexts to customize the manifest
> -        generation.
> -        """
> -        return self.manifest().matches(match)
> -
>      def _matchstatus(self, other, match):
>          """return match.always if match is none
>
> @@ -119,15 +109,15 @@ class basectx(object):
>          # delta application.
>          if self.rev() is not None and self.rev() < other.rev():
>              self.manifest()
> -        mf1 = other._manifestmatches(match, s)
> -        mf2 = self._manifestmatches(match, s)
> +        mf1 = other.manifest()
> +        mf2 = self.manifest()
>
>          modified, added = [], []
>          removed = []
>          clean = []
>          deleted, unknown, ignored = s.deleted, s.unknown, s.ignored
>          deletedset = set(deleted)
> -        d = mf1.diff(mf2, clean=listclean)
> +        d = mf1.diff(mf2, match=match, clean=listclean)
>          for fn, value in d.iteritems():
>              if fn in deletedset:
>                  continue
> @@ -154,8 +144,10 @@ class basectx(object):
>
>          if removed:
>              # need to filter files if they are already reported as removed
> -            unknown = [fn for fn in unknown if fn not in mf1]
> -            ignored = [fn for fn in ignored if fn not in mf1]
> +            unknown = [fn for fn in unknown if fn not in mf1 and
> +                                               (not match or match(fn))]
> +            ignored = [fn for fn in ignored if fn not in mf1 and
> +                                               (not match or match(fn))]
>              # if they're deleted, don't report them as removed
>              removed = [fn for fn in removed if fn not in deletedset]
>
> @@ -1608,22 +1600,6 @@ class workingctx(committablectx):
>                  pass
>          return modified, fixup
>
> -    def _manifestmatches(self, match, s):
> -        """Slow path for workingctx
> -
> -        The fast path is when we compare the working directory to its parent
> -        which means this function is comparing with a non-parent; therefore we
> -        need to build a manifest and return what matches.
> -        """
> -        mf = self._repo['.']._manifestmatches(match, s)
> -        for f in s.modified + s.added:
> -            mf[f] = newnodeid
> -            mf.setflag(f, self.flags(f))
> -        for f in s.removed:
> -            if f in mf:
> -                del mf[f]
> -        return mf
> -

Nice! But could you explain why this is no longer needed? Is it
completely thanks to the new manifest.diff() API or something else?

>      def _dirstatestatus(self, match=None, ignored=False, clean=False,
>                          unknown=False):
>          '''Gets the status from the dirstate -- internal use only.'''
> _______________________________________________
> Mercurial-devel mailing list
> Mercurial-devel@mercurial-scm.org
> https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Durham Goode - March 8, 2017, 12:13 a.m.
On 3/7/17 2:40 PM, Martin von Zweigbergk wrote:
> On Fri, Mar 3, 2017 at 11:34 AM, Durham Goode <durham@fb.com> wrote:
>> # HG changeset patch
>> # User Durham Goode <durham@fb.com>
>> # Date 1488569391 28800
>> #      Fri Mar 03 11:29:51 2017 -0800
>> # Node ID 207763e895c7d24885df22f5b9c0df5494d77daf
>> # Parent  78e0fb2bd1bc972444ae08e7b7165da66cbf53a3
>> context: remove uses of manifest.matches
>>
>> This removes the uses of manifest.matches in context.py in favor of the new api.
>> This is part of removing manifest.matches since it is O(manifest).
>>
>> diff --git a/mercurial/context.py b/mercurial/context.py
>> --- a/mercurial/context.py
>> +++ b/mercurial/context.py
>> @@ -18,7 +18,6 @@ from .node import (
>>      bin,
>>      hex,
>>      modifiednodeid,
>> -    newnodeid,
>>      nullid,
>>      nullrev,
>>      short,
>> @@ -91,15 +90,6 @@ class basectx(object):
>>      def __iter__(self):
>>          return iter(self._manifest)
>>
>> -    def _manifestmatches(self, match, s):
>> -        """generate a new manifest filtered by the match argument
>> -
>> -        This method is for internal use only and mainly exists to provide an
>> -        object oriented way for other contexts to customize the manifest
>> -        generation.
>> -        """
>> -        return self.manifest().matches(match)
>> -
>>      def _matchstatus(self, other, match):
>>          """return match.always if match is none
>>
>> @@ -119,15 +109,15 @@ class basectx(object):
>>          # delta application.
>>          if self.rev() is not None and self.rev() < other.rev():
>>              self.manifest()
>> -        mf1 = other._manifestmatches(match, s)
>> -        mf2 = self._manifestmatches(match, s)
>> +        mf1 = other.manifest()
>> +        mf2 = self.manifest()
>>
>>          modified, added = [], []
>>          removed = []
>>          clean = []
>>          deleted, unknown, ignored = s.deleted, s.unknown, s.ignored
>>          deletedset = set(deleted)
>> -        d = mf1.diff(mf2, clean=listclean)
>> +        d = mf1.diff(mf2, match=match, clean=listclean)
>>          for fn, value in d.iteritems():
>>              if fn in deletedset:
>>                  continue
>> @@ -154,8 +144,10 @@ class basectx(object):
>>
>>          if removed:
>>              # need to filter files if they are already reported as removed
>> -            unknown = [fn for fn in unknown if fn not in mf1]
>> -            ignored = [fn for fn in ignored if fn not in mf1]
>> +            unknown = [fn for fn in unknown if fn not in mf1 and
>> +                                               (not match or match(fn))]
>> +            ignored = [fn for fn in ignored if fn not in mf1 and
>> +                                               (not match or match(fn))]
>>              # if they're deleted, don't report them as removed
>>              removed = [fn for fn in removed if fn not in deletedset]
>>
>> @@ -1608,22 +1600,6 @@ class workingctx(committablectx):
>>                  pass
>>          return modified, fixup
>>
>> -    def _manifestmatches(self, match, s):
>> -        """Slow path for workingctx
>> -
>> -        The fast path is when we compare the working directory to its parent
>> -        which means this function is comparing with a non-parent; therefore we
>> -        need to build a manifest and return what matches.
>> -        """
>> -        mf = self._repo['.']._manifestmatches(match, s)
>> -        for f in s.modified + s.added:
>> -            mf[f] = newnodeid
>> -            mf.setflag(f, self.flags(f))
>> -        for f in s.removed:
>> -            if f in mf:
>> -                del mf[f]
>> -        return mf
>> -
>
> Nice! But could you explain why this is no longer needed? Is it
> completely thanks to the new manifest.diff() API or something else?

My original reason was because self._manifest() could already generate 
this manifest (and does a more accurate job of it than this function), 
and since the matcher was moved to the diff layer, nothing here would be 
lost by switching to self._manifest().

Upon looking deeper, it looks like the big benefit here is that the 
status is provided, and therefore we don't have to call status a second 
time, (i.e. when self._manifest calls 'self._status()'). This has the 
additional benefit that the matcher can be applied to the status output 
early.  But this only applies when diffing the working copy against a 
commit that is not the parent of the working copy, so a somewhat niche case.

I have some ideas about how I could refactor this area to address this, 
so I'll include them in V2. It might make the series 10-12 patches long 
though.
via Mercurial-devel - March 8, 2017, 12:45 a.m.
On Tue, Mar 7, 2017 at 4:13 PM, Durham Goode <durham@fb.com> wrote:
>
>
> On 3/7/17 2:40 PM, Martin von Zweigbergk wrote:
>>
>> On Fri, Mar 3, 2017 at 11:34 AM, Durham Goode <durham@fb.com> wrote:
>>>
>>> # HG changeset patch
>>> # User Durham Goode <durham@fb.com>
>>> # Date 1488569391 28800
>>> #      Fri Mar 03 11:29:51 2017 -0800
>>> # Node ID 207763e895c7d24885df22f5b9c0df5494d77daf
>>> # Parent  78e0fb2bd1bc972444ae08e7b7165da66cbf53a3
>>> context: remove uses of manifest.matches
>>>
>>> This removes the uses of manifest.matches in context.py in favor of the
>>> new api.
>>> This is part of removing manifest.matches since it is O(manifest).
>>>
>>> diff --git a/mercurial/context.py b/mercurial/context.py
>>> --- a/mercurial/context.py
>>> +++ b/mercurial/context.py
>>> @@ -18,7 +18,6 @@ from .node import (
>>>      bin,
>>>      hex,
>>>      modifiednodeid,
>>> -    newnodeid,
>>>      nullid,
>>>      nullrev,
>>>      short,
>>> @@ -91,15 +90,6 @@ class basectx(object):
>>>      def __iter__(self):
>>>          return iter(self._manifest)
>>>
>>> -    def _manifestmatches(self, match, s):
>>> -        """generate a new manifest filtered by the match argument
>>> -
>>> -        This method is for internal use only and mainly exists to
>>> provide an
>>> -        object oriented way for other contexts to customize the manifest
>>> -        generation.
>>> -        """
>>> -        return self.manifest().matches(match)
>>> -
>>>      def _matchstatus(self, other, match):
>>>          """return match.always if match is none
>>>
>>> @@ -119,15 +109,15 @@ class basectx(object):
>>>          # delta application.
>>>          if self.rev() is not None and self.rev() < other.rev():
>>>              self.manifest()
>>> -        mf1 = other._manifestmatches(match, s)
>>> -        mf2 = self._manifestmatches(match, s)
>>> +        mf1 = other.manifest()
>>> +        mf2 = self.manifest()
>>>
>>>          modified, added = [], []
>>>          removed = []
>>>          clean = []
>>>          deleted, unknown, ignored = s.deleted, s.unknown, s.ignored
>>>          deletedset = set(deleted)
>>> -        d = mf1.diff(mf2, clean=listclean)
>>> +        d = mf1.diff(mf2, match=match, clean=listclean)
>>>          for fn, value in d.iteritems():
>>>              if fn in deletedset:
>>>                  continue
>>> @@ -154,8 +144,10 @@ class basectx(object):
>>>
>>>          if removed:
>>>              # need to filter files if they are already reported as
>>> removed
>>> -            unknown = [fn for fn in unknown if fn not in mf1]
>>> -            ignored = [fn for fn in ignored if fn not in mf1]
>>> +            unknown = [fn for fn in unknown if fn not in mf1 and
>>> +                                               (not match or match(fn))]
>>> +            ignored = [fn for fn in ignored if fn not in mf1 and
>>> +                                               (not match or match(fn))]
>>>              # if they're deleted, don't report them as removed
>>>              removed = [fn for fn in removed if fn not in deletedset]
>>>
>>> @@ -1608,22 +1600,6 @@ class workingctx(committablectx):
>>>                  pass
>>>          return modified, fixup
>>>
>>> -    def _manifestmatches(self, match, s):
>>> -        """Slow path for workingctx
>>> -
>>> -        The fast path is when we compare the working directory to its
>>> parent
>>> -        which means this function is comparing with a non-parent;
>>> therefore we
>>> -        need to build a manifest and return what matches.
>>> -        """
>>> -        mf = self._repo['.']._manifestmatches(match, s)
>>> -        for f in s.modified + s.added:
>>> -            mf[f] = newnodeid
>>> -            mf.setflag(f, self.flags(f))
>>> -        for f in s.removed:
>>> -            if f in mf:
>>> -                del mf[f]
>>> -        return mf
>>> -
>>
>>
>> Nice! But could you explain why this is no longer needed? Is it
>> completely thanks to the new manifest.diff() API or something else?
>
>
> My original reason was because self._manifest() could already generate this
> manifest (and does a more accurate job of it than this function), and since
> the matcher was moved to the diff layer, nothing here would be lost by
> switching to self._manifest().
>
> Upon looking deeper, it looks like the big benefit here is that the status
> is provided, and therefore we don't have to call status a second time, (i.e.
> when self._manifest calls 'self._status()'). This has the additional benefit
> that the matcher can be applied to the status output early.  But this only
> applies when diffing the working copy against a commit that is not the
> parent of the working copy, so a somewhat niche case.

Thanks for checking.

>
> I have some ideas about how I could refactor this area to address this, so
> I'll include them in V2. It might make the series 10-12 patches long though.

That sounds fine to me.

Patch

diff --git a/mercurial/context.py b/mercurial/context.py
--- a/mercurial/context.py
+++ b/mercurial/context.py
@@ -18,7 +18,6 @@  from .node import (
     bin,
     hex,
     modifiednodeid,
-    newnodeid,
     nullid,
     nullrev,
     short,
@@ -91,15 +90,6 @@  class basectx(object):
     def __iter__(self):
         return iter(self._manifest)
 
-    def _manifestmatches(self, match, s):
-        """generate a new manifest filtered by the match argument
-
-        This method is for internal use only and mainly exists to provide an
-        object oriented way for other contexts to customize the manifest
-        generation.
-        """
-        return self.manifest().matches(match)
-
     def _matchstatus(self, other, match):
         """return match.always if match is none
 
@@ -119,15 +109,15 @@  class basectx(object):
         # delta application.
         if self.rev() is not None and self.rev() < other.rev():
             self.manifest()
-        mf1 = other._manifestmatches(match, s)
-        mf2 = self._manifestmatches(match, s)
+        mf1 = other.manifest()
+        mf2 = self.manifest()
 
         modified, added = [], []
         removed = []
         clean = []
         deleted, unknown, ignored = s.deleted, s.unknown, s.ignored
         deletedset = set(deleted)
-        d = mf1.diff(mf2, clean=listclean)
+        d = mf1.diff(mf2, match=match, clean=listclean)
         for fn, value in d.iteritems():
             if fn in deletedset:
                 continue
@@ -154,8 +144,10 @@  class basectx(object):
 
         if removed:
             # need to filter files if they are already reported as removed
-            unknown = [fn for fn in unknown if fn not in mf1]
-            ignored = [fn for fn in ignored if fn not in mf1]
+            unknown = [fn for fn in unknown if fn not in mf1 and
+                                               (not match or match(fn))]
+            ignored = [fn for fn in ignored if fn not in mf1 and
+                                               (not match or match(fn))]
             # if they're deleted, don't report them as removed
             removed = [fn for fn in removed if fn not in deletedset]
 
@@ -1608,22 +1600,6 @@  class workingctx(committablectx):
                 pass
         return modified, fixup
 
-    def _manifestmatches(self, match, s):
-        """Slow path for workingctx
-
-        The fast path is when we compare the working directory to its parent
-        which means this function is comparing with a non-parent; therefore we
-        need to build a manifest and return what matches.
-        """
-        mf = self._repo['.']._manifestmatches(match, s)
-        for f in s.modified + s.added:
-            mf[f] = newnodeid
-            mf.setflag(f, self.flags(f))
-        for f in s.removed:
-            if f in mf:
-                del mf[f]
-        return mf
-
     def _dirstatestatus(self, match=None, ignored=False, clean=False,
                         unknown=False):
         '''Gets the status from the dirstate -- internal use only.'''