Patchwork [2,of,7] contrib: add an import checker

login
register
mail settings
Submitter Augie Fackler
Date Nov. 17, 2013, 5:21 p.m.
Message ID <8b6007b0ecd735b917d3.1384708884@hit-nxdomain.opendns.com>
Download mbox | patch
Permalink /patch/3018/
State Superseded
Commit e5d51fa51abacc03764b60ed76e330cfed6901cf
Headers show

Comments

Augie Fackler - Nov. 17, 2013, 5:21 p.m.
# HG changeset patch
# User Augie Fackler <raf@durin42.com>
# Date 1384708016 18000
#      Sun Nov 17 12:06:56 2013 -0500
# Node ID 8b6007b0ecd735b917d39bdeab1b2ec028fd3992
# Parent  a9df4d553ae5067270540e97960b4df610e57904
contrib: add an import checker

This checks for cycles in the module graph and verifies that imports
of stdlib modules are not on the same line as relative imports of
mercurial modules.

Patch

diff --git a/contrib/import-checker.py b/contrib/import-checker.py
new file mode 100644
--- /dev/null
+++ b/contrib/import-checker.py
@@ -0,0 +1,194 @@ 
+import ast
+import os
+import sys
+
+def dotted_name_of_path(path):
+    """Given a relative path to a source file, return its dotted module name.
+
+
+    >>> dotted_name_of_path('mercurial/error.py')
+    'mercurial.error'
+    """
+    parts = path.split('/')
+    parts[-1] = parts[-1][:-3] # remove .py
+    return '.'.join(parts)
+
+
+def list_stdlib_modules():
+    """List the modules present in the stdlib.
+
+    >>> mods = set(list_stdlib_modules())
+    >>> 'BaseHTTPServer' in mods
+    True
+
+    os.path isn't really a module, so it's missing:
+
+    >>> 'os.path' in mods
+    False
+
+    sys requires special treatment, because it's baked into the
+    interpreter, but it should still appear:
+
+    >>> 'sys' in mods
+    True
+
+    >>> 'collections' in mods
+    True
+
+    >>> 'cStringIO' in mods
+    True
+    """
+    for m in sys.builtin_module_names:
+        yield m
+    # These modules only exist on windows, but we should always
+    # consider them stdlib.
+    for m in ['msvcrt', '_winreg']:
+        yield m
+    # These get missed too
+    for m in 'ctypes', 'email':
+        yield m
+    yield 'builtins' # python3 only
+    for libpath in sys.path:
+        # We want to walk everything in sys.path that starts with
+        # either sys.prefix or sys.exec_prefix.
+        if not (libpath.startswith(sys.prefix)
+                or libpath.startswith(sys.exec_prefix)):
+            continue
+        if 'site-packages' in libpath:
+            continue
+        for top, dirs, files in os.walk(libpath):
+            for name in files:
+                if name == '__init__.py':
+                    continue
+                if not (name.endswith('.py') or name.endswith('.so')):
+                    continue
+                full_path = os.path.join(top, name)
+                if 'site-packages' in full_path:
+                    continue
+                rel_path = full_path[len(libpath) + 1:]
+                mod = dotted_name_of_path(rel_path)
+                yield mod
+
+stdlib_modules = set(list_stdlib_modules())
+
+def imported_modules(source):
+    """Given the source of a file as a string, yield the names
+    imported by that file.
+
+    >>> list(imported_modules(
+    ...         'import foo ; from baz import bar; import foo.qux'))
+    ['foo', 'baz.bar', 'foo.qux']
+    """
+    for node in ast.walk(ast.parse(source)):
+        if isinstance(node, ast.Import):
+            for n in node.names:
+                yield n.name
+        elif isinstance(node, ast.ImportFrom):
+            prefix = node.module + '.'
+            for n in node.names:
+                yield prefix + n.name
+
+def verify_stdlib_on_own_line(source):
+    """Given some python source, verify that stdlib imports are done
+    in separate statements from relative local module imports.
+
+    Observing this limitation is important as it works around an
+    annoying lib2to3 bug in relative import rewrites:
+    http://bugs.python.org/issue19510.
+
+    >>> list(verify_stdlib_on_own_line('import sys, foo'))
+    ['mixed stdlib and relative imports:\\n   foo, sys']
+    >>> list(verify_stdlib_on_own_line('import sys, os'))
+    []
+    >>> list(verify_stdlib_on_own_line('import foo, bar'))
+    []
+    """
+    for node in ast.walk(ast.parse(source)):
+        if isinstance(node, ast.Import):
+            from_stdlib = {}
+            for n in node.names:
+                from_stdlib[n.name] = n.name in stdlib_modules
+            num_std = len([x for x in from_stdlib.values() if x])
+            if num_std not in (len(from_stdlib.values()), 0):
+                yield ('mixed stdlib and relative imports:\n   %s' %
+                       ', '.join(sorted(from_stdlib.iterkeys())))
+
+class CircularImport(Exception):
+    pass
+
+
+def cyclekey(names):
+    return tuple(sorted(set(names)))
+
+def check_one_mod(mod, imports, path=None, ignore=None):
+    if path is None:
+        path = []
+    if ignore is None:
+        ignore = []
+    path = path + [mod]
+    for i in sorted(imports.get(mod, [])):
+        if i not in stdlib_modules:
+            i = mod.rsplit('.', 1)[0] + '.' + i
+        if i in path:
+            firstspot = path.index(i)
+            cycle = path[firstspot:] + [i]
+            if cyclekey(cycle) not in ignore:
+                raise CircularImport(cycle)
+            continue
+        check_one_mod(i, imports, path=path, ignore=ignore)
+
+
+def find_cycles(imports):
+    """Find cycles in an already-loaded import graph.
+
+    >>> imports = {'top.foo': ['bar', 'os.path', 'qux'],
+    ...            'top.bar': ['baz', 'sys'],
+    ...            'top.baz': ['foo'],
+    ...            'top.qux': ['foo']}
+    >>> print '\\n'.join(sorted(find_cycles(imports)))
+    top.bar -> top.baz -> top.foo -> top.bar
+    top.foo -> top.qux -> top.foo
+    """
+    cycles = {}
+    seen_in_cycle = set()
+    for mod in sorted(imports.iterkeys()):
+        try:
+            check_one_mod(mod, imports, ignore=cycles)
+        except CircularImport, e:
+            cycle = e.args[0]
+            cycles[cyclekey(cycle)] = ' -> '.join(cycle)
+            seen_in_cycle |= set(cycle)
+    return cycles.values()
+
+def main(argv):
+    if len(argv) < 2:
+        print 'Usage: %s file [file] [file] ...'
+        return 1
+    used_imports = {}
+    any_errors = False
+    for source_path in argv[1:]:
+        f = open(source_path)
+        modname = dotted_name_of_path(source_path)
+        src = f.read()
+        used_imports[modname] = sorted(imported_modules(src))
+        for error in verify_stdlib_on_own_line(src):
+            any_errors = True
+            print source_path, error
+        f.close()
+    cycles = find_cycles(used_imports)
+    if cycles:
+        firstmods = set()
+        for c in sorted(cycles, key=len):
+            first = c.split()[0]
+            # As a rough cut, ignore any cycle that starts with the
+            # same module as some other cycle. Otherwise we see lots
+            # of cycles that are effectively duplicates.
+            if first in firstmods:
+                continue
+            print 'Import cycle:', c
+            firstmods.add(first)
+        any_errors = True
+    return not any_errors
+
+if __name__ == '__main__':
+    sys.exit(int(main(sys.argv)))