contrib/import-checker.py
changeset 20036 e5d51fa51aba
child 20037 957b43371928
equal deleted inserted replaced
20035:cd79d9ab5e42 20036:e5d51fa51aba
       
     1 import ast
       
     2 import os
       
     3 import sys
       
     4 
       
     5 def dotted_name_of_path(path):
       
     6     """Given a relative path to a source file, return its dotted module name.
       
     7 
       
     8 
       
     9     >>> dotted_name_of_path('mercurial/error.py')
       
    10     'mercurial.error'
       
    11     """
       
    12     parts = path.split('/')
       
    13     parts[-1] = parts[-1][:-3] # remove .py
       
    14     return '.'.join(parts)
       
    15 
       
    16 
       
    17 def list_stdlib_modules():
       
    18     """List the modules present in the stdlib.
       
    19 
       
    20     >>> mods = set(list_stdlib_modules())
       
    21     >>> 'BaseHTTPServer' in mods
       
    22     True
       
    23 
       
    24     os.path isn't really a module, so it's missing:
       
    25 
       
    26     >>> 'os.path' in mods
       
    27     False
       
    28 
       
    29     sys requires special treatment, because it's baked into the
       
    30     interpreter, but it should still appear:
       
    31 
       
    32     >>> 'sys' in mods
       
    33     True
       
    34 
       
    35     >>> 'collections' in mods
       
    36     True
       
    37 
       
    38     >>> 'cStringIO' in mods
       
    39     True
       
    40     """
       
    41     for m in sys.builtin_module_names:
       
    42         yield m
       
    43     # These modules only exist on windows, but we should always
       
    44     # consider them stdlib.
       
    45     for m in ['msvcrt', '_winreg']:
       
    46         yield m
       
    47     # These get missed too
       
    48     for m in 'ctypes', 'email':
       
    49         yield m
       
    50     yield 'builtins' # python3 only
       
    51     for libpath in sys.path:
       
    52         # We want to walk everything in sys.path that starts with
       
    53         # either sys.prefix or sys.exec_prefix.
       
    54         if not (libpath.startswith(sys.prefix)
       
    55                 or libpath.startswith(sys.exec_prefix)):
       
    56             continue
       
    57         if 'site-packages' in libpath:
       
    58             continue
       
    59         for top, dirs, files in os.walk(libpath):
       
    60             for name in files:
       
    61                 if name == '__init__.py':
       
    62                     continue
       
    63                 if not (name.endswith('.py') or name.endswith('.so')):
       
    64                     continue
       
    65                 full_path = os.path.join(top, name)
       
    66                 if 'site-packages' in full_path:
       
    67                     continue
       
    68                 rel_path = full_path[len(libpath) + 1:]
       
    69                 mod = dotted_name_of_path(rel_path)
       
    70                 yield mod
       
    71 
       
    72 stdlib_modules = set(list_stdlib_modules())
       
    73 
       
    74 def imported_modules(source):
       
    75     """Given the source of a file as a string, yield the names
       
    76     imported by that file.
       
    77 
       
    78     >>> list(imported_modules(
       
    79     ...         'import foo ; from baz import bar; import foo.qux'))
       
    80     ['foo', 'baz.bar', 'foo.qux']
       
    81     """
       
    82     for node in ast.walk(ast.parse(source)):
       
    83         if isinstance(node, ast.Import):
       
    84             for n in node.names:
       
    85                 yield n.name
       
    86         elif isinstance(node, ast.ImportFrom):
       
    87             prefix = node.module + '.'
       
    88             for n in node.names:
       
    89                 yield prefix + n.name
       
    90 
       
    91 def verify_stdlib_on_own_line(source):
       
    92     """Given some python source, verify that stdlib imports are done
       
    93     in separate statements from relative local module imports.
       
    94 
       
    95     Observing this limitation is important as it works around an
       
    96     annoying lib2to3 bug in relative import rewrites:
       
    97     http://bugs.python.org/issue19510.
       
    98 
       
    99     >>> list(verify_stdlib_on_own_line('import sys, foo'))
       
   100     ['mixed stdlib and relative imports:\\n   foo, sys']
       
   101     >>> list(verify_stdlib_on_own_line('import sys, os'))
       
   102     []
       
   103     >>> list(verify_stdlib_on_own_line('import foo, bar'))
       
   104     []
       
   105     """
       
   106     for node in ast.walk(ast.parse(source)):
       
   107         if isinstance(node, ast.Import):
       
   108             from_stdlib = {}
       
   109             for n in node.names:
       
   110                 from_stdlib[n.name] = n.name in stdlib_modules
       
   111             num_std = len([x for x in from_stdlib.values() if x])
       
   112             if num_std not in (len(from_stdlib.values()), 0):
       
   113                 yield ('mixed stdlib and relative imports:\n   %s' %
       
   114                        ', '.join(sorted(from_stdlib.iterkeys())))
       
   115 
       
   116 class CircularImport(Exception):
       
   117     pass
       
   118 
       
   119 
       
   120 def cyclekey(names):
       
   121     return tuple(sorted(set(names)))
       
   122 
       
   123 def check_one_mod(mod, imports, path=None, ignore=None):
       
   124     if path is None:
       
   125         path = []
       
   126     if ignore is None:
       
   127         ignore = []
       
   128     path = path + [mod]
       
   129     for i in sorted(imports.get(mod, [])):
       
   130         if i not in stdlib_modules:
       
   131             i = mod.rsplit('.', 1)[0] + '.' + i
       
   132         if i in path:
       
   133             firstspot = path.index(i)
       
   134             cycle = path[firstspot:] + [i]
       
   135             if cyclekey(cycle) not in ignore:
       
   136                 raise CircularImport(cycle)
       
   137             continue
       
   138         check_one_mod(i, imports, path=path, ignore=ignore)
       
   139 
       
   140 
       
   141 def find_cycles(imports):
       
   142     """Find cycles in an already-loaded import graph.
       
   143 
       
   144     >>> imports = {'top.foo': ['bar', 'os.path', 'qux'],
       
   145     ...            'top.bar': ['baz', 'sys'],
       
   146     ...            'top.baz': ['foo'],
       
   147     ...            'top.qux': ['foo']}
       
   148     >>> print '\\n'.join(sorted(find_cycles(imports)))
       
   149     top.bar -> top.baz -> top.foo -> top.bar
       
   150     top.foo -> top.qux -> top.foo
       
   151     """
       
   152     cycles = {}
       
   153     for mod in sorted(imports.iterkeys()):
       
   154         try:
       
   155             check_one_mod(mod, imports, ignore=cycles)
       
   156         except CircularImport, e:
       
   157             cycle = e.args[0]
       
   158             cycles[cyclekey(cycle)] = ' -> '.join(rotatecycle(cycle))
       
   159     return cycles.values()
       
   160 
       
   161 def _cycle_sortkey(c):
       
   162     return len(c), c
       
   163 
       
   164 def main(argv):
       
   165     if len(argv) < 2:
       
   166         print 'Usage: %s file [file] [file] ...'
       
   167         return 1
       
   168     used_imports = {}
       
   169     any_errors = False
       
   170     for source_path in argv[1:]:
       
   171         f = open(source_path)
       
   172         modname = dotted_name_of_path(source_path)
       
   173         src = f.read()
       
   174         used_imports[modname] = sorted(imported_modules(src))
       
   175         for error in verify_stdlib_on_own_line(src):
       
   176             any_errors = True
       
   177             print source_path, error
       
   178         f.close()
       
   179     cycles = find_cycles(used_imports)
       
   180     if cycles:
       
   181         firstmods = set()
       
   182         for c in sorted(cycles, key=_cycle_sortkey):
       
   183             first = c.split()[0]
       
   184             # As a rough cut, ignore any cycle that starts with the
       
   185             # same module as some other cycle. Otherwise we see lots
       
   186             # of cycles that are effectively duplicates.
       
   187             if first in firstmods:
       
   188                 continue
       
   189             print 'Import cycle:', c
       
   190             firstmods.add(first)
       
   191         any_errors = True
       
   192     return not any_errors
       
   193 
       
   194 if __name__ == '__main__':
       
   195     sys.exit(int(main(sys.argv)))