151 st = tokens[j] |
151 st = tokens[j] |
152 if st.type == token.STRING and st.string.startswith(("'", '"')): |
152 if st.type == token.STRING and st.string.startswith(("'", '"')): |
153 tokens[j] = st._replace(string='u%s' % st.string) |
153 tokens[j] = st._replace(string='u%s' % st.string) |
154 |
154 |
155 for i, t in enumerate(tokens): |
155 for i, t in enumerate(tokens): |
156 # Convert most string literals to byte literals. String literals |
|
157 # in Python 2 are bytes. String literals in Python 3 are unicode. |
|
158 # Most strings in Mercurial are bytes and unicode strings are rare. |
|
159 # Rather than rewrite all string literals to use ``b''`` to indicate |
|
160 # byte strings, we apply this token transformer to insert the ``b`` |
|
161 # prefix nearly everywhere. |
|
162 if t.type == token.STRING: |
|
163 s = t.string |
|
164 |
|
165 # Preserve docstrings as string literals. This is inconsistent |
|
166 # with regular unprefixed strings. However, the |
|
167 # "from __future__" parsing (which allows a module docstring to |
|
168 # exist before it) doesn't properly handle the docstring if it |
|
169 # is b''' prefixed, leading to a SyntaxError. We leave all |
|
170 # docstrings as unprefixed to avoid this. This means Mercurial |
|
171 # components touching docstrings need to handle unicode, |
|
172 # unfortunately. |
|
173 if s[0:3] in ("'''", '"""'): |
|
174 yield t |
|
175 continue |
|
176 |
|
177 # If the first character isn't a quote, it is likely a string |
|
178 # prefixing character (such as 'b', 'u', or 'r'. Ignore. |
|
179 if s[0] not in ("'", '"'): |
|
180 yield t |
|
181 continue |
|
182 |
|
183 # String literal. Prefix to make a b'' string. |
|
184 yield t._replace(string='b%s' % t.string) |
|
185 continue |
|
186 |
|
187 # Insert compatibility imports at "from __future__ import" line. |
156 # Insert compatibility imports at "from __future__ import" line. |
188 # No '\n' should be added to preserve line numbers. |
157 # No '\n' should be added to preserve line numbers. |
189 if ( |
158 if ( |
190 t.type == token.NAME |
159 t.type == token.NAME |
191 and t.string == 'import' |
160 and t.string == 'import' |