Submitter | via Mercurial-devel |
---|---|
Date | May 22, 2017, 6:22 p.m. |
Message ID | <fa82a6f7adb3deef43da.1495477353@martinvonz.svl.corp.google.com> |
Download | mbox | patch |
Permalink | /patch/20836/ |
State | Accepted |
Headers | show |
Comments
On Mon, May 22, 2017 at 11:22:33AM -0700, Martin von Zweigbergk via Mercurial-devel wrote: > # HG changeset patch > # User Martin von Zweigbergk <martinvonz@google.com> > # Date 1495476498 25200 > # Mon May 22 11:08:18 2017 -0700 > # Node ID fa82a6f7adb3deef43dacf5059e906eed9a1beba > # Parent bdc4861ffe597d6dc0c19b57dcb98edaf5aaa89f > match: implement __repr__() and update users (API) queued, thanks
On Mon, 22 May 2017 11:22:33 -0700, Martin von Zweigbergk via Mercurial-devel wrote: > # HG changeset patch > # User Martin von Zweigbergk <martinvonz@google.com> > # Date 1495476498 25200 > # Mon May 22 11:08:18 2017 -0700 > # Node ID fa82a6f7adb3deef43dacf5059e906eed9a1beba > # Parent bdc4861ffe597d6dc0c19b57dcb98edaf5aaa89f > match: implement __repr__() and update users (API) > > fsmonitor and debugignore currently access matcher fields that I would > consider implementation details, namely patternspat, includepat, and > excludepat. Let' instead implement __repr__() and have the few users > use that instead. > > Marked (API) because the fields can now be None. > > diff --git a/hgext/fsmonitor/__init__.py b/hgext/fsmonitor/__init__.py > --- a/hgext/fsmonitor/__init__.py > +++ b/hgext/fsmonitor/__init__.py > @@ -148,19 +148,7 @@ > > """ > sha1 = hashlib.sha1() > - if util.safehasattr(ignore, 'includepat'): > - sha1.update(ignore.includepat) > - sha1.update('\0\0') > - if util.safehasattr(ignore, 'excludepat'): > - sha1.update(ignore.excludepat) > - sha1.update('\0\0') > - if util.safehasattr(ignore, 'patternspat'): > - sha1.update(ignore.patternspat) > - sha1.update('\0\0') > - if util.safehasattr(ignore, '_files'): > - for f in ignore._files: > - sha1.update(f) > - sha1.update('\0') > + sha1.update(repr(ignore)) > return sha1.hexdigest() This will cause problems on Python 3 where repr() must return a unicode string but sha1 expects bytes.
On Tue, May 23, 2017 at 5:36 AM, Yuya Nishihara <yuya@tcha.org> wrote: > On Mon, 22 May 2017 11:22:33 -0700, Martin von Zweigbergk via Mercurial-devel wrote: >> # HG changeset patch >> # User Martin von Zweigbergk <martinvonz@google.com> >> # Date 1495476498 25200 >> # Mon May 22 11:08:18 2017 -0700 >> # Node ID fa82a6f7adb3deef43dacf5059e906eed9a1beba >> # Parent bdc4861ffe597d6dc0c19b57dcb98edaf5aaa89f >> match: implement __repr__() and update users (API) >> >> fsmonitor and debugignore currently access matcher fields that I would >> consider implementation details, namely patternspat, includepat, and >> excludepat. Let' instead implement __repr__() and have the few users >> use that instead. >> >> Marked (API) because the fields can now be None. >> >> diff --git a/hgext/fsmonitor/__init__.py b/hgext/fsmonitor/__init__.py >> --- a/hgext/fsmonitor/__init__.py >> +++ b/hgext/fsmonitor/__init__.py >> @@ -148,19 +148,7 @@ >> >> """ >> sha1 = hashlib.sha1() >> - if util.safehasattr(ignore, 'includepat'): >> - sha1.update(ignore.includepat) >> - sha1.update('\0\0') >> - if util.safehasattr(ignore, 'excludepat'): >> - sha1.update(ignore.excludepat) >> - sha1.update('\0\0') >> - if util.safehasattr(ignore, 'patternspat'): >> - sha1.update(ignore.patternspat) >> - sha1.update('\0\0') >> - if util.safehasattr(ignore, '_files'): >> - for f in ignore._files: >> - sha1.update(f) >> - sha1.update('\0') >> + sha1.update(repr(ignore)) >> return sha1.hexdigest() > > This will cause problems on Python 3 where repr() must return a unicode string > but sha1 expects bytes. Good point. Since the patterns (regexes) are bytes (I think), it seems like we'd want the representation to be bytes as well. IIUC, __bytes__ was introduced in py3, so we can't use that. Should we add a custom bytes() (or bytesrepr()? or ...) or what do we do? Okay if I fix this in a followup? Augie said he'd slightly prefer that because my patch is already pretty deep down in the stack. I also have a long series built on top that will also use the __repr__ format in tests.
On Tue, 23 May 2017 16:12:27 -0700, Martin von Zweigbergk wrote: > On Tue, May 23, 2017 at 5:36 AM, Yuya Nishihara <yuya@tcha.org> wrote: > > On Mon, 22 May 2017 11:22:33 -0700, Martin von Zweigbergk via Mercurial-devel wrote: > >> # HG changeset patch > >> # User Martin von Zweigbergk <martinvonz@google.com> > >> # Date 1495476498 25200 > >> # Mon May 22 11:08:18 2017 -0700 > >> # Node ID fa82a6f7adb3deef43dacf5059e906eed9a1beba > >> # Parent bdc4861ffe597d6dc0c19b57dcb98edaf5aaa89f > >> match: implement __repr__() and update users (API) > >> > >> fsmonitor and debugignore currently access matcher fields that I would > >> consider implementation details, namely patternspat, includepat, and > >> excludepat. Let' instead implement __repr__() and have the few users > >> use that instead. > >> > >> Marked (API) because the fields can now be None. > >> > >> diff --git a/hgext/fsmonitor/__init__.py b/hgext/fsmonitor/__init__.py > >> --- a/hgext/fsmonitor/__init__.py > >> +++ b/hgext/fsmonitor/__init__.py > >> @@ -148,19 +148,7 @@ > >> > >> """ > >> sha1 = hashlib.sha1() > >> - if util.safehasattr(ignore, 'includepat'): > >> - sha1.update(ignore.includepat) > >> - sha1.update('\0\0') > >> - if util.safehasattr(ignore, 'excludepat'): > >> - sha1.update(ignore.excludepat) > >> - sha1.update('\0\0') > >> - if util.safehasattr(ignore, 'patternspat'): > >> - sha1.update(ignore.patternspat) > >> - sha1.update('\0\0') > >> - if util.safehasattr(ignore, '_files'): > >> - for f in ignore._files: > >> - sha1.update(f) > >> - sha1.update('\0') > >> + sha1.update(repr(ignore)) > >> return sha1.hexdigest() > > > > This will cause problems on Python 3 where repr() must return a unicode string > > but sha1 expects bytes. > > Good point. Since the patterns (regexes) are bytes (I think), it seems > like we'd want the representation to be bytes as well. IIUC, __bytes__ > was introduced in py3, so we can't use that. Should we add a custom > bytes() (or bytesrepr()? or ...) or what do we do? repr() seems fine for debugging output, though we'll need to convert it back to bytes (by e.g. sysbytes()) on Python 3 (and glob out b'' in tests.) I'm not a fan of using repr() as a cache key, but that's probably okay. We can encode it by sysbytes(). > Okay if I fix this in a followup? Augie said he'd slightly prefer that > because my patch is already pretty deep down in the stack. I also have > a long series built on top that will also use the __repr__ format in > tests. Yeah, this isn't a blocker anyway.
On Mon, 22 May 2017 14:22:33 -0400, Martin von Zweigbergk via Mercurial-devel <mercurial-devel@mercurial-scm.org> wrote: > # HG changeset patch > # User Martin von Zweigbergk <martinvonz@google.com> > # Date 1495476498 25200 > # Mon May 22 11:08:18 2017 -0700 > # Node ID fa82a6f7adb3deef43dacf5059e906eed9a1beba > # Parent bdc4861ffe597d6dc0c19b57dcb98edaf5aaa89f > match: implement __repr__() and update users (API) This (952017471f93) changes the documented behavior of `hg debugignore` beyond listing the matcher and its fields. For example, using the hg repo, the start of the output: (?:(?:|.*/)[^/]*\.elc(?:/|$) changes to: <includematcher includes='(?:(?:|.*/)[^/]*\\.elc(?:/|$) (note the new double backslash) I don't care one way or the other, but no tests show the backslash escaping, so I wasn't sure if that was an oversight or if the documentation should just say "don't parse this".
Patch
diff --git a/hgext/fsmonitor/__init__.py b/hgext/fsmonitor/__init__.py --- a/hgext/fsmonitor/__init__.py +++ b/hgext/fsmonitor/__init__.py @@ -148,19 +148,7 @@ """ sha1 = hashlib.sha1() - if util.safehasattr(ignore, 'includepat'): - sha1.update(ignore.includepat) - sha1.update('\0\0') - if util.safehasattr(ignore, 'excludepat'): - sha1.update(ignore.excludepat) - sha1.update('\0\0') - if util.safehasattr(ignore, 'patternspat'): - sha1.update(ignore.patternspat) - sha1.update('\0\0') - if util.safehasattr(ignore, '_files'): - for f in ignore._files: - sha1.update(f) - sha1.update('\0') + sha1.update(repr(ignore)) return sha1.hexdigest() _watchmanencoding = pywatchman.encoding.get_local_encoding() diff --git a/mercurial/debugcommands.py b/mercurial/debugcommands.py --- a/mercurial/debugcommands.py +++ b/mercurial/debugcommands.py @@ -810,11 +810,7 @@ ignore = repo.dirstate._ignore if not files: # Show all the patterns - includepat = getattr(ignore, 'includepat', None) - if includepat is not None: - ui.write("%s\n" % includepat) - else: - raise error.Abort(_("no ignore patterns found")) + ui.write("%s\n" % repr(ignore)) else: for f in files: nf = util.normpath(f) diff --git a/mercurial/match.py b/mercurial/match.py --- a/mercurial/match.py +++ b/mercurial/match.py @@ -214,6 +214,9 @@ self._anypats = bool(include or exclude) self._always = False self._pathrestricted = bool(include or exclude or patterns) + self.patternspat = None + self.includepat = None + self.excludepat = None # roots are directories which are recursively included/excluded. self._includeroots = set() @@ -375,6 +378,11 @@ def prefix(self): return not self.always() and not self.isexact() and not self.anypats() + def __repr__(self): + return ('<matcher files=%r, patterns=%r, includes=%r, excludes=%r>' % + (self._files, self.patternspat, self.includepat, + self.excludepat)) + class subdirmatcher(matcher): """Adapt a matcher to work on a subdirectory only. diff --git a/tests/test-hgignore.t b/tests/test-hgignore.t --- a/tests/test-hgignore.t +++ b/tests/test-hgignore.t @@ -164,7 +164,7 @@ A b.o $ hg debugignore - (?:(?:|.*/)[^/]*(?:/|$)) + <matcher files=[], patterns=None, includes=(?:(?:|.*/)[^/]*(?:/|$)), excludes=None> $ hg debugignore b.o b.o is ignored