15 import os |
15 import os |
16 import sys |
16 import sys |
17 import tempfile |
17 import tempfile |
18 import token |
18 import token |
19 import tokenize |
19 import tokenize |
|
20 |
|
21 def adjusttokenpos(t, ofs): |
|
22 """Adjust start/end column of the given token""" |
|
23 return t._replace(start=(t.start[0], t.start[1] + ofs), |
|
24 end=(t.end[0], t.end[1] + ofs)) |
20 |
25 |
21 if True: |
26 if True: |
22 def replacetokens(tokens, opts): |
27 def replacetokens(tokens, opts): |
23 """Transform a stream of tokens from raw to Python 3. |
28 """Transform a stream of tokens from raw to Python 3. |
24 |
29 |
76 """ |
81 """ |
77 st = tokens[j] |
82 st = tokens[j] |
78 if st.type == token.STRING and st.string.startswith(("'", '"')): |
83 if st.type == token.STRING and st.string.startswith(("'", '"')): |
79 sysstrtokens.add(st) |
84 sysstrtokens.add(st) |
80 |
85 |
|
86 coldelta = 0 # column increment for new opening parens |
|
87 coloffset = -1 # column offset for the current line (-1: TBD) |
|
88 parens = [(0, 0, 0)] # stack of (line, end-column, column-offset) |
81 for i, t in enumerate(tokens): |
89 for i, t in enumerate(tokens): |
|
90 # Compute the column offset for the current line, such that |
|
91 # the current line will be aligned to the last opening paren |
|
92 # as before. |
|
93 if coloffset < 0: |
|
94 if t.start[1] == parens[-1][1]: |
|
95 coloffset = parens[-1][2] |
|
96 elif t.start[1] + 1 == parens[-1][1]: |
|
97 # fix misaligned indent of s/util.Abort/error.Abort/ |
|
98 coloffset = parens[-1][2] + (parens[-1][1] - t.start[1]) |
|
99 else: |
|
100 coloffset = 0 |
|
101 |
|
102 # Reset per-line attributes at EOL. |
|
103 if t.type in (token.NEWLINE, tokenize.NL): |
|
104 yield adjusttokenpos(t, coloffset) |
|
105 coldelta = 0 |
|
106 coloffset = -1 |
|
107 continue |
|
108 |
|
109 # Remember the last paren position. |
|
110 if _isop(i, '(', '[', '{'): |
|
111 parens.append(t.end + (coloffset + coldelta,)) |
|
112 elif _isop(i, ')', ']', '}'): |
|
113 parens.pop() |
|
114 |
82 # Convert most string literals to byte literals. String literals |
115 # Convert most string literals to byte literals. String literals |
83 # in Python 2 are bytes. String literals in Python 3 are unicode. |
116 # in Python 2 are bytes. String literals in Python 3 are unicode. |
84 # Most strings in Mercurial are bytes and unicode strings are rare. |
117 # Most strings in Mercurial are bytes and unicode strings are rare. |
85 # Rather than rewrite all string literals to use ``b''`` to indicate |
118 # Rather than rewrite all string literals to use ``b''`` to indicate |
86 # byte strings, we apply this token transformer to insert the ``b`` |
119 # byte strings, we apply this token transformer to insert the ``b`` |
95 # is b''' prefixed, leading to a SyntaxError. We leave all |
128 # is b''' prefixed, leading to a SyntaxError. We leave all |
96 # docstrings as unprefixed to avoid this. This means Mercurial |
129 # docstrings as unprefixed to avoid this. This means Mercurial |
97 # components touching docstrings need to handle unicode, |
130 # components touching docstrings need to handle unicode, |
98 # unfortunately. |
131 # unfortunately. |
99 if s[0:3] in ("'''", '"""'): |
132 if s[0:3] in ("'''", '"""'): |
100 yield t |
133 yield adjusttokenpos(t, coloffset) |
101 continue |
134 continue |
102 |
135 |
103 # If the first character isn't a quote, it is likely a string |
136 # If the first character isn't a quote, it is likely a string |
104 # prefixing character (such as 'b', 'u', or 'r'. Ignore. |
137 # prefixing character (such as 'b', 'u', or 'r'. Ignore. |
105 if s[0] not in ("'", '"'): |
138 if s[0] not in ("'", '"'): |
106 yield t |
139 yield adjusttokenpos(t, coloffset) |
107 continue |
140 continue |
108 |
141 |
109 # String literal. Prefix to make a b'' string. |
142 # String literal. Prefix to make a b'' string. |
110 yield t._replace(string='b%s' % t.string) |
143 yield adjusttokenpos(t._replace(string='b%s' % t.string), |
|
144 coloffset) |
|
145 coldelta += 1 |
111 continue |
146 continue |
112 |
147 |
113 # This looks like a function call. |
148 # This looks like a function call. |
114 if t.type == token.NAME and _isop(i + 1, '('): |
149 if t.type == token.NAME and _isop(i + 1, '('): |
115 fn = t.string |
150 fn = t.string |
130 _ensuresysstr(argidx) |
165 _ensuresysstr(argidx) |
131 |
166 |
132 # It changes iteritems/values to items/values as they are not |
167 # It changes iteritems/values to items/values as they are not |
133 # present in Python 3 world. |
168 # present in Python 3 world. |
134 elif opts['dictiter'] and fn in ('iteritems', 'itervalues'): |
169 elif opts['dictiter'] and fn in ('iteritems', 'itervalues'): |
135 yield t._replace(string=fn[4:]) |
170 yield adjusttokenpos(t._replace(string=fn[4:]), coloffset) |
136 continue |
171 continue |
137 |
172 |
138 # Emit unmodified token. |
173 # Emit unmodified token. |
139 yield t |
174 yield adjusttokenpos(t, coloffset) |
140 |
175 |
141 def process(fin, fout, opts): |
176 def process(fin, fout, opts): |
142 tokens = tokenize.tokenize(fin.readline) |
177 tokens = tokenize.tokenize(fin.readline) |
143 tokens = replacetokens(list(tokens), opts) |
178 tokens = replacetokens(list(tokens), opts) |
144 fout.write(tokenize.untokenize(tokens)) |
179 fout.write(tokenize.untokenize(tokens)) |