Patchwork [4,of,7,v2] import-checker: parse python code from .t files

login
register
mail settings
Submitter timeless@mozdev.org
Date April 13, 2016, 5:21 p.m.
Message ID <0fd09ef3b42024889053.1460568107@waste.org>
Download mbox | patch
Permalink /patch/14593/
State Accepted
Delegated to: Yuya Nishihara
Headers show

Comments

timeless@mozdev.org - April 13, 2016, 5:21 p.m.
# HG changeset patch
# User timeless <timeless@mozdev.org>
# Date 1460497436 0
#      Tue Apr 12 21:43:56 2016 +0000
# Node ID 0fd09ef3b42024889053a97b4610a5f6b009bf34
# Parent  5af048d312cd081f88706aa7b19af85083c197ec
import-checker: parse python code from .t files
Yuya Nishihara - April 15, 2016, 2:35 p.m.
On Wed, 13 Apr 2016 12:21:47 -0500, timeless wrote:
> # HG changeset patch
> # User timeless <timeless@mozdev.org>
> # Date 1460497436 0
> #      Tue Apr 12 21:43:56 2016 +0000
> # Node ID 0fd09ef3b42024889053a97b4610a5f6b009bf34
> # Parent  5af048d312cd081f88706aa7b19af85083c197ec
> import-checker: parse python code from .t files
> 
> diff --git a/contrib/import-checker.py b/contrib/import-checker.py
> --- a/contrib/import-checker.py
> +++ b/contrib/import-checker.py
> @@ -5,6 +5,7 @@
>  import ast
>  import collections
>  import os
> +import re
>  import sys
>  
>  # Import a minimal set of stdlib modules needed for list_stdlib_modules()
> @@ -568,10 +569,97 @@
>  def _cycle_sortkey(c):
>      return len(c), c
>  
> +def embedded(f, modname, src):
> +    """ Extract embedded python code
> +
> +    >>> def test(fn, lines):
> +    ...     for s, m, f, l in embedded(fn, "example", lines):
> +    ...         print("%s %s %s" % (m, f, l))
> +    ...         print(repr(s))
> +    >>> lines = [
> +    ...   'comment',
> +    ...   '  >>> from __future__ import print_function',
> +    ...   "  >>> ' multiline",
> +    ...   "  ... string'",
> +    ...   '  ',
> +    ...   'comment',
> +    ...   '  $ cat > foo.py <<EOF',
> +    ...   '  > from __future__ import print_function',
> +    ...   '  > EOF',
> +    ... ]
> +    >>> test("example.t", lines)
> +    example[2] doctest.py 2
> +    "from __future__ import print_function\\n' multiline\\nstring'\\n"
> +    example[7] foo.py 7
> +    'from __future__ import print_function\\n'
> +    """
> +    inlinepython = 0
> +    shpython = 0
> +    script = []
> +    prefix = 6
> +    t = ''
> +    n = 0
> +    for l in src:
> +        n += 1
> +        if not l.endswith(b'\n'):
> +            l += b'\n'

Mixing bytes and str would be problem on Python3, but seems okay for now.

> +        if l.startswith(b'  >>> '): # python inlines
> +            if shpython:
> +                print("Parse Error %s:%d\n" % (f, n))

Adjusted as "%s:%d: Parse Error" and dropped extra "\n".


> +            if not inlinepython:
> +                # We've just entered a Python block.
> +                inlinepython = n
> +                t = 'doctest.py'
> +            script.append(l[prefix:])
> +            continue
> +        if l.startswith(b'  ... '): # python inlines
> +            script.append(l[prefix:])
> +            continue
> +        cat = re.search("\$ \s*cat\s*>\s*(\S+.py)\s*<<\s*EOF", l)

Should be r"\$ \s*cat\s*>\s*(\S+\.py)\s*<<\s*EOF". Fixed in flight.
          ^                     ^

> +    py = False
>      if f.endswith('.py'):
>          with open(f) as src:
> -            yield src.read(), modname
> +            yield src.read(), modname, f, 0
> +            py = True
> +    if py or f.endswith('.t'):
> +        with open(f) as src:
> +            for script, modname, t, line in embedded(f, modname, src):
> +                yield script, modname, t, line

I don't understand this "py" flag, but it won't be big deal.

Patch

diff --git a/contrib/import-checker.py b/contrib/import-checker.py
--- a/contrib/import-checker.py
+++ b/contrib/import-checker.py
@@ -5,6 +5,7 @@ 
 import ast
 import collections
 import os
+import re
 import sys
 
 # Import a minimal set of stdlib modules needed for list_stdlib_modules()
@@ -568,10 +569,97 @@ 
 def _cycle_sortkey(c):
     return len(c), c
 
+def embedded(f, modname, src):
+    """ Extract embedded python code
+
+    >>> def test(fn, lines):
+    ...     for s, m, f, l in embedded(fn, "example", lines):
+    ...         print("%s %s %s" % (m, f, l))
+    ...         print(repr(s))
+    >>> lines = [
+    ...   'comment',
+    ...   '  >>> from __future__ import print_function',
+    ...   "  >>> ' multiline",
+    ...   "  ... string'",
+    ...   '  ',
+    ...   'comment',
+    ...   '  $ cat > foo.py <<EOF',
+    ...   '  > from __future__ import print_function',
+    ...   '  > EOF',
+    ... ]
+    >>> test("example.t", lines)
+    example[2] doctest.py 2
+    "from __future__ import print_function\\n' multiline\\nstring'\\n"
+    example[7] foo.py 7
+    'from __future__ import print_function\\n'
+    """
+    inlinepython = 0
+    shpython = 0
+    script = []
+    prefix = 6
+    t = ''
+    n = 0
+    for l in src:
+        n += 1
+        if not l.endswith(b'\n'):
+            l += b'\n'
+        if l.startswith(b'  >>> '): # python inlines
+            if shpython:
+                print("Parse Error %s:%d\n" % (f, n))
+            if not inlinepython:
+                # We've just entered a Python block.
+                inlinepython = n
+                t = 'doctest.py'
+            script.append(l[prefix:])
+            continue
+        if l.startswith(b'  ... '): # python inlines
+            script.append(l[prefix:])
+            continue
+        cat = re.search("\$ \s*cat\s*>\s*(\S+.py)\s*<<\s*EOF", l)
+        if cat:
+            if inlinepython:
+                yield ''.join(script), ("%s[%d]" %
+                       (modname, inlinepython)), t, inlinepython
+                script = []
+                inlinepython = 0
+            shpython = n
+            t = cat.group(1)
+            continue
+        if shpython and l.startswith(b'  > '): # sh continuation
+            if l == b'  > EOF\n':
+                yield ''.join(script), ("%s[%d]" %
+                       (modname, shpython)), t, shpython
+                script = []
+                shpython = 0
+            else:
+                script.append(l[4:])
+            continue
+        if inlinepython and l == b'  \n':
+            yield ''.join(script), ("%s[%d]" %
+                   (modname, inlinepython)), t, inlinepython
+            script = []
+            inlinepython = 0
+            continue
+
 def sources(f, modname):
+    """ yields possibly multiple sources from a filepath
+
+    input: filepath, modulename
+    yields:  script(string), modulename, filepath, linenumber
+
+    For embedded scripts, the modulename and filepath will be different
+    from the function arguments. linenumber is an offset relative to
+    the input file.
+    """
+    py = False
     if f.endswith('.py'):
         with open(f) as src:
-            yield src.read(), modname
+            yield src.read(), modname, f, 0
+            py = True
+    if py or f.endswith('.t'):
+        with open(f) as src:
+            for script, modname, t, line in embedded(f, modname, src):
+                yield script, modname, t, line
 
 def main(argv):
     if len(argv) < 2 or (argv[1] == '-' and len(argv) > 2):
@@ -587,18 +675,18 @@ 
         modname = dotted_name_of_path(source_path, trimpure=True)
         localmods[modname] = source_path
     for localmodname, source_path in sorted(localmods.items()):
-        for src, modname in sources(source_path, localmodname):
+        for src, modname, name, line in sources(source_path, localmodname):
             try:
                 used_imports[modname] = sorted(
-                    imported_modules(src, modname, source_path, localmods,
+                    imported_modules(src, modname, name, localmods,
                                      ignore_nested=True))
                 for error, lineno in verify_import_convention(modname, src,
                                                               localmods):
                     any_errors = True
-                    print('%s:%d: %s' % (source_path, lineno, error))
+                    print('%s:%d: %s' % (source_path, lineno + line, error))
             except SyntaxError as e:
                 print('SyntaxError %s:%d: %s' %
-                      (source_path, e.lineno, e))
+                      (source_path, e.lineno + line, e))
     cycles = find_cycles(used_imports)
     if cycles:
         firstmods = set()