Patchwork [01,of,10] contrib: have import-checker work mostly with native strings for mod names

login
register
mail settings
Submitter Augie Fackler
Date Aug. 23, 2017, 2:55 p.m.
Message ID <e915b9703f675b2f76c5.1503500100@imladris.local>
Download mbox | patch
Permalink /patch/23232/
State Accepted
Headers show

Comments

Augie Fackler - Aug. 23, 2017, 2:55 p.m.
# HG changeset patch
# User Augie Fackler <raf@durin42.com>
# Date 1503421161 14400
#      Tue Aug 22 12:59:21 2017 -0400
# Node ID e915b9703f675b2f76c512347ddff0f6c65a9748
# Parent  edf503e5dfd408f900f3bad0a6923573813e276b
contrib: have import-checker work mostly with native strings for mod names

Module names are a bit awkward to deal with portably otherwise.
via Mercurial-devel - Aug. 28, 2017, 6:55 a.m.
On Wed, Aug 23, 2017 at 7:55 AM, Augie Fackler <raf@durin42.com> wrote:
> # HG changeset patch
> # User Augie Fackler <raf@durin42.com>
> # Date 1503421161 14400
> #      Tue Aug 22 12:59:21 2017 -0400
> # Node ID e915b9703f675b2f76c512347ddff0f6c65a9748
> # Parent  edf503e5dfd408f900f3bad0a6923573813e276b
> contrib: have import-checker work mostly with native strings for mod names
>
> Module names are a bit awkward to deal with portably otherwise.
>
> diff --git a/contrib/import-checker.py b/contrib/import-checker.py
> --- a/contrib/import-checker.py
> +++ b/contrib/import-checker.py
> @@ -147,6 +147,8 @@ def fromlocalfunc(modulename, localmods)
>      >>> fromlocal2('bar', 2)
>      ('foo.bar', 'foo.bar.__init__', True)
>      """
> +    if not isinstance(modulename, str):
> +        modulename = modulename.decode('ascii')

Should this be 'if isinstance(modulename, bytes)' instead? That seems
clearer to me. Also, 'abc'.decode('ascii') seems to fail on Py3. Is
that a concern?

>      prefix = '.'.join(modulename.split('.')[:-1])
>      if prefix:
>          prefix += '.'
> @@ -406,6 +408,8 @@ def verify_modern_convention(module, roo
>      * Certain modules must be aliased to alternate names to avoid aliasing
>        and readability problems. See `requirealias`.
>      """
> +    if not isinstance(module, str):
> +        module = module.decode('ascii')
>      topmodule = module.split('.')[0]
>      fromlocal = fromlocalfunc(module, localmods)
>
> @@ -724,6 +728,9 @@ def main(argv):
>          localmodpaths[modname] = source_path
>      localmods = populateextmods(localmodpaths)
>      for localmodname, source_path in sorted(localmodpaths.items()):
> +        if not isinstance(localmodname, bytes):
> +            # This is only safe because all hg's files are ascii
> +            localmodname = localmodname.encode('ascii')
>          for src, modname, name, line in sources(source_path, localmodname):
>              try:
>                  used_imports[modname] = sorted(
> _______________________________________________
> Mercurial-devel mailing list
> Mercurial-devel@mercurial-scm.org
> https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Augie Fackler - Aug. 28, 2017, 1:57 p.m.
> On Aug 28, 2017, at 02:55, Martin von Zweigbergk <martinvonz@google.com> wrote:
> 
> On Wed, Aug 23, 2017 at 7:55 AM, Augie Fackler <raf@durin42.com> wrote:
>> # HG changeset patch
>> # User Augie Fackler <raf@durin42.com>
>> # Date 1503421161 14400
>> #      Tue Aug 22 12:59:21 2017 -0400
>> # Node ID e915b9703f675b2f76c512347ddff0f6c65a9748
>> # Parent  edf503e5dfd408f900f3bad0a6923573813e276b
>> contrib: have import-checker work mostly with native strings for mod names
>> 
>> Module names are a bit awkward to deal with portably otherwise.
>> 
>> diff --git a/contrib/import-checker.py b/contrib/import-checker.py
>> --- a/contrib/import-checker.py
>> +++ b/contrib/import-checker.py
>> @@ -147,6 +147,8 @@ def fromlocalfunc(modulename, localmods)
>>>>> fromlocal2('bar', 2)
>>     ('foo.bar', 'foo.bar.__init__', True)
>>     """
>> +    if not isinstance(modulename, str):
>> +        modulename = modulename.decode('ascii')
> 
> Should this be 'if isinstance(modulename, bytes)' instead? That seems
> clearer to me.

Clearer, but wrong, because str == bytes on Python 2 and you'd end up decoding to a unicode rather than to a native string.

> Also, 'abc'.decode('ascii') seems to fail on Py3. Is
> that a concern?

No, because 'abc' is instance str, so you wouldn't evaluate the body of the if statement.

> 
>>     prefix = '.'.join(modulename.split('.')[:-1])
>>     if prefix:
>>         prefix += '.'
>> @@ -406,6 +408,8 @@ def verify_modern_convention(module, roo
>>     * Certain modules must be aliased to alternate names to avoid aliasing
>>       and readability problems. See `requirealias`.
>>     """
>> +    if not isinstance(module, str):
>> +        module = module.decode('ascii')
>>     topmodule = module.split('.')[0]
>>     fromlocal = fromlocalfunc(module, localmods)
>> 
>> @@ -724,6 +728,9 @@ def main(argv):
>>         localmodpaths[modname] = source_path
>>     localmods = populateextmods(localmodpaths)
>>     for localmodname, source_path in sorted(localmodpaths.items()):
>> +        if not isinstance(localmodname, bytes):
>> +            # This is only safe because all hg's files are ascii
>> +            localmodname = localmodname.encode('ascii')
>>         for src, modname, name, line in sources(source_path, localmodname):
>>             try:
>>                 used_imports[modname] = sorted(
>> _______________________________________________
>> Mercurial-devel mailing list
>> Mercurial-devel@mercurial-scm.org
>> https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
via Mercurial-devel - Aug. 28, 2017, 4:50 p.m.
On Mon, Aug 28, 2017 at 6:57 AM, Augie Fackler <raf@durin42.com> wrote:
>
>> On Aug 28, 2017, at 02:55, Martin von Zweigbergk <martinvonz@google.com> wrote:
>>
>> On Wed, Aug 23, 2017 at 7:55 AM, Augie Fackler <raf@durin42.com> wrote:
>>> # HG changeset patch
>>> # User Augie Fackler <raf@durin42.com>
>>> # Date 1503421161 14400
>>> #      Tue Aug 22 12:59:21 2017 -0400
>>> # Node ID e915b9703f675b2f76c512347ddff0f6c65a9748
>>> # Parent  edf503e5dfd408f900f3bad0a6923573813e276b
>>> contrib: have import-checker work mostly with native strings for mod names
>>>
>>> Module names are a bit awkward to deal with portably otherwise.
>>>
>>> diff --git a/contrib/import-checker.py b/contrib/import-checker.py
>>> --- a/contrib/import-checker.py
>>> +++ b/contrib/import-checker.py
>>> @@ -147,6 +147,8 @@ def fromlocalfunc(modulename, localmods)
>>>>>> fromlocal2('bar', 2)
>>>     ('foo.bar', 'foo.bar.__init__', True)
>>>     """
>>> +    if not isinstance(modulename, str):
>>> +        modulename = modulename.decode('ascii')
>>
>> Should this be 'if isinstance(modulename, bytes)' instead? That seems
>> clearer to me.
>
> Clearer, but wrong, because str == bytes on Python 2 and you'd end up decoding to a unicode rather than to a native string.

Ah, I think I had gotten this backwards, thinking this was converting
*to* bytes, so my comments didn't make much sense. Sorry about the
noise.

Patch

diff --git a/contrib/import-checker.py b/contrib/import-checker.py
--- a/contrib/import-checker.py
+++ b/contrib/import-checker.py
@@ -147,6 +147,8 @@  def fromlocalfunc(modulename, localmods)
     >>> fromlocal2('bar', 2)
     ('foo.bar', 'foo.bar.__init__', True)
     """
+    if not isinstance(modulename, str):
+        modulename = modulename.decode('ascii')
     prefix = '.'.join(modulename.split('.')[:-1])
     if prefix:
         prefix += '.'
@@ -406,6 +408,8 @@  def verify_modern_convention(module, roo
     * Certain modules must be aliased to alternate names to avoid aliasing
       and readability problems. See `requirealias`.
     """
+    if not isinstance(module, str):
+        module = module.decode('ascii')
     topmodule = module.split('.')[0]
     fromlocal = fromlocalfunc(module, localmods)
 
@@ -724,6 +728,9 @@  def main(argv):
         localmodpaths[modname] = source_path
     localmods = populateextmods(localmodpaths)
     for localmodname, source_path in sorted(localmodpaths.items()):
+        if not isinstance(localmodname, bytes):
+            # This is only safe because all hg's files are ascii
+            localmodname = localmodname.encode('ascii')
         for src, modname, name, line in sources(source_path, localmodname):
             try:
                 used_imports[modname] = sorted(