|
1 import ast |
|
2 import os |
|
3 import sys |
|
4 |
|
5 def dotted_name_of_path(path): |
|
6 """Given a relative path to a source file, return its dotted module name. |
|
7 |
|
8 |
|
9 >>> dotted_name_of_path('mercurial/error.py') |
|
10 'mercurial.error' |
|
11 """ |
|
12 parts = path.split('/') |
|
13 parts[-1] = parts[-1][:-3] # remove .py |
|
14 return '.'.join(parts) |
|
15 |
|
16 |
|
17 def list_stdlib_modules(): |
|
18 """List the modules present in the stdlib. |
|
19 |
|
20 >>> mods = set(list_stdlib_modules()) |
|
21 >>> 'BaseHTTPServer' in mods |
|
22 True |
|
23 |
|
24 os.path isn't really a module, so it's missing: |
|
25 |
|
26 >>> 'os.path' in mods |
|
27 False |
|
28 |
|
29 sys requires special treatment, because it's baked into the |
|
30 interpreter, but it should still appear: |
|
31 |
|
32 >>> 'sys' in mods |
|
33 True |
|
34 |
|
35 >>> 'collections' in mods |
|
36 True |
|
37 |
|
38 >>> 'cStringIO' in mods |
|
39 True |
|
40 """ |
|
41 for m in sys.builtin_module_names: |
|
42 yield m |
|
43 # These modules only exist on windows, but we should always |
|
44 # consider them stdlib. |
|
45 for m in ['msvcrt', '_winreg']: |
|
46 yield m |
|
47 # These get missed too |
|
48 for m in 'ctypes', 'email': |
|
49 yield m |
|
50 yield 'builtins' # python3 only |
|
51 for libpath in sys.path: |
|
52 # We want to walk everything in sys.path that starts with |
|
53 # either sys.prefix or sys.exec_prefix. |
|
54 if not (libpath.startswith(sys.prefix) |
|
55 or libpath.startswith(sys.exec_prefix)): |
|
56 continue |
|
57 if 'site-packages' in libpath: |
|
58 continue |
|
59 for top, dirs, files in os.walk(libpath): |
|
60 for name in files: |
|
61 if name == '__init__.py': |
|
62 continue |
|
63 if not (name.endswith('.py') or name.endswith('.so')): |
|
64 continue |
|
65 full_path = os.path.join(top, name) |
|
66 if 'site-packages' in full_path: |
|
67 continue |
|
68 rel_path = full_path[len(libpath) + 1:] |
|
69 mod = dotted_name_of_path(rel_path) |
|
70 yield mod |
|
71 |
|
72 stdlib_modules = set(list_stdlib_modules()) |
|
73 |
|
74 def imported_modules(source): |
|
75 """Given the source of a file as a string, yield the names |
|
76 imported by that file. |
|
77 |
|
78 >>> list(imported_modules( |
|
79 ... 'import foo ; from baz import bar; import foo.qux')) |
|
80 ['foo', 'baz.bar', 'foo.qux'] |
|
81 """ |
|
82 for node in ast.walk(ast.parse(source)): |
|
83 if isinstance(node, ast.Import): |
|
84 for n in node.names: |
|
85 yield n.name |
|
86 elif isinstance(node, ast.ImportFrom): |
|
87 prefix = node.module + '.' |
|
88 for n in node.names: |
|
89 yield prefix + n.name |
|
90 |
|
91 def verify_stdlib_on_own_line(source): |
|
92 """Given some python source, verify that stdlib imports are done |
|
93 in separate statements from relative local module imports. |
|
94 |
|
95 Observing this limitation is important as it works around an |
|
96 annoying lib2to3 bug in relative import rewrites: |
|
97 http://bugs.python.org/issue19510. |
|
98 |
|
99 >>> list(verify_stdlib_on_own_line('import sys, foo')) |
|
100 ['mixed stdlib and relative imports:\\n foo, sys'] |
|
101 >>> list(verify_stdlib_on_own_line('import sys, os')) |
|
102 [] |
|
103 >>> list(verify_stdlib_on_own_line('import foo, bar')) |
|
104 [] |
|
105 """ |
|
106 for node in ast.walk(ast.parse(source)): |
|
107 if isinstance(node, ast.Import): |
|
108 from_stdlib = {} |
|
109 for n in node.names: |
|
110 from_stdlib[n.name] = n.name in stdlib_modules |
|
111 num_std = len([x for x in from_stdlib.values() if x]) |
|
112 if num_std not in (len(from_stdlib.values()), 0): |
|
113 yield ('mixed stdlib and relative imports:\n %s' % |
|
114 ', '.join(sorted(from_stdlib.iterkeys()))) |
|
115 |
|
116 class CircularImport(Exception): |
|
117 pass |
|
118 |
|
119 |
|
120 def cyclekey(names): |
|
121 return tuple(sorted(set(names))) |
|
122 |
|
123 def check_one_mod(mod, imports, path=None, ignore=None): |
|
124 if path is None: |
|
125 path = [] |
|
126 if ignore is None: |
|
127 ignore = [] |
|
128 path = path + [mod] |
|
129 for i in sorted(imports.get(mod, [])): |
|
130 if i not in stdlib_modules: |
|
131 i = mod.rsplit('.', 1)[0] + '.' + i |
|
132 if i in path: |
|
133 firstspot = path.index(i) |
|
134 cycle = path[firstspot:] + [i] |
|
135 if cyclekey(cycle) not in ignore: |
|
136 raise CircularImport(cycle) |
|
137 continue |
|
138 check_one_mod(i, imports, path=path, ignore=ignore) |
|
139 |
|
140 |
|
141 def find_cycles(imports): |
|
142 """Find cycles in an already-loaded import graph. |
|
143 |
|
144 >>> imports = {'top.foo': ['bar', 'os.path', 'qux'], |
|
145 ... 'top.bar': ['baz', 'sys'], |
|
146 ... 'top.baz': ['foo'], |
|
147 ... 'top.qux': ['foo']} |
|
148 >>> print '\\n'.join(sorted(find_cycles(imports))) |
|
149 top.bar -> top.baz -> top.foo -> top.bar |
|
150 top.foo -> top.qux -> top.foo |
|
151 """ |
|
152 cycles = {} |
|
153 for mod in sorted(imports.iterkeys()): |
|
154 try: |
|
155 check_one_mod(mod, imports, ignore=cycles) |
|
156 except CircularImport, e: |
|
157 cycle = e.args[0] |
|
158 cycles[cyclekey(cycle)] = ' -> '.join(rotatecycle(cycle)) |
|
159 return cycles.values() |
|
160 |
|
161 def _cycle_sortkey(c): |
|
162 return len(c), c |
|
163 |
|
164 def main(argv): |
|
165 if len(argv) < 2: |
|
166 print 'Usage: %s file [file] [file] ...' |
|
167 return 1 |
|
168 used_imports = {} |
|
169 any_errors = False |
|
170 for source_path in argv[1:]: |
|
171 f = open(source_path) |
|
172 modname = dotted_name_of_path(source_path) |
|
173 src = f.read() |
|
174 used_imports[modname] = sorted(imported_modules(src)) |
|
175 for error in verify_stdlib_on_own_line(src): |
|
176 any_errors = True |
|
177 print source_path, error |
|
178 f.close() |
|
179 cycles = find_cycles(used_imports) |
|
180 if cycles: |
|
181 firstmods = set() |
|
182 for c in sorted(cycles, key=_cycle_sortkey): |
|
183 first = c.split()[0] |
|
184 # As a rough cut, ignore any cycle that starts with the |
|
185 # same module as some other cycle. Otherwise we see lots |
|
186 # of cycles that are effectively duplicates. |
|
187 if first in firstmods: |
|
188 continue |
|
189 print 'Import cycle:', c |
|
190 firstmods.add(first) |
|
191 any_errors = True |
|
192 return not any_errors |
|
193 |
|
194 if __name__ == '__main__': |
|
195 sys.exit(int(main(sys.argv))) |