import-checker: use testparseutil.embedded() to centralize detection logic
authorFUJIWARA Katsunori <foozy@lares.dti.ne.jp>
Thu, 23 Aug 2018 12:25:54 +0900
changeset 40095 7288838bec1f
parent 40094 ff47ba7a2903
child 40096 12a72729678e
import-checker: use testparseutil.embedded() to centralize detection logic This patch fixes issues of embedded() in import-checker.py below, too. - overlook (or mis-detect) the end of inline script in doctest style - overlook inline script in doctest style at the end of file (and ignore invalid un-closed heredoc at the end of file, too) - overlook code fragment in styles below - "python <<EOF" (heredoc should be "cat > file <<EOF" style) - "cat > foobar.py << ANYLIMIT" (limit mark should be "EOF") - "cat << EOF > foobar.py" (filename should be placed before limit mark) - "cat >> foobar.py << EOF" (appending is ignored)
contrib/import-checker.py
--- a/contrib/import-checker.py	Thu Aug 23 12:25:54 2018 +0900
+++ b/contrib/import-checker.py	Thu Aug 23 12:25:54 2018 +0900
@@ -5,7 +5,6 @@
 import ast
 import collections
 import os
-import re
 import sys
 
 # Import a minimal set of stdlib modules needed for list_stdlib_modules()
@@ -18,6 +17,8 @@
         basehttpserver = None
     import zlib
 
+import testparseutil
+
 # Whitelist of modules that symbols can be directly imported from.
 allowsymbolimports = (
     '__future__',
@@ -659,61 +660,21 @@
     ...   b'  > EOF',
     ... ]
     >>> test(b"example.t", lines)
-    example[2] doctest.py 2
-    "from __future__ import print_function\\n' multiline\\nstring'\\n"
-    example[7] foo.py 7
+    example[2] doctest.py 1
+    "from __future__ import print_function\\n' multiline\\nstring'\\n\\n"
+    example[8] foo.py 7
     'from __future__ import print_function\\n'
     """
-    inlinepython = 0
-    shpython = 0
-    script = []
-    prefix = 6
-    t = ''
-    n = 0
-    for l in src:
-        n += 1
-        if not l.endswith(b'\n'):
-            l += b'\n'
-        if l.startswith(b'  >>> '): # python inlines
-            if shpython:
-                print("%s:%d: Parse Error" % (f, n))
-            if not inlinepython:
-                # We've just entered a Python block.
-                inlinepython = n
-                t = b'doctest.py'
-            script.append(l[prefix:])
-            continue
-        if l.startswith(b'  ... '): # python inlines
-            script.append(l[prefix:])
-            continue
-        cat = re.search(br"\$ \s*cat\s*>\s*(\S+\.py)\s*<<\s*EOF", l)
-        if cat:
-            if inlinepython:
-                yield b''.join(script), (b"%s[%d]" %
-                       (modname, inlinepython)), t, inlinepython
-                script = []
-                inlinepython = 0
-            shpython = n
-            t = cat.group(1)
-            continue
-        if shpython and l.startswith(b'  > '): # sh continuation
-            if l == b'  > EOF\n':
-                yield b''.join(script), (b"%s[%d]" %
-                       (modname, shpython)), t, shpython
-                script = []
-                shpython = 0
-            else:
-                script.append(l[4:])
-            continue
-        # If we have an empty line or a command for sh, we end the
-        # inline script.
-        if inlinepython and (l == b'  \n'
-                             or l.startswith(b'  $ ')):
-            yield b''.join(script), (b"%s[%d]" %
-                   (modname, inlinepython)), t, inlinepython
-            script = []
-            inlinepython = 0
-            continue
+    errors = []
+    for name, starts, ends, code in testparseutil.pyembedded(f, src, errors):
+        if not name:
+            # use 'doctest.py', in order to make already existing
+            # doctest above pass instantly
+            name = 'doctest.py'
+        # "starts" is "line number" (1-origin), but embedded() is
+        # expected to return "line offset" (0-origin). Therefore, this
+        # yields "starts - 1".
+        yield code, "%s[%d]" % (modname, starts), name, starts - 1
 
 def sources(f, modname):
     """Yields possibly multiple sources from a filepath