wireproto: convert python literal to object without using unsafe eval()
authorYuya Nishihara <yuya@tcha.org>
Sun, 08 Apr 2018 11:55:46 +0900
changeset 37476 e9dea82ea1f3
parent 37475 152f1b47e0ad
child 37477 28e8c5949109
wireproto: convert python literal to object without using unsafe eval() Follows up cc5a040fe150. At this point, I don't think we need a real eval(). If we want to support a set literal, maybe we can vendor ast.literal_eval(), which is relatively simple function.
mercurial/utils/stringutil.py
mercurial/wireprotoframing.py
tests/test-wireproto-serverreactor.py
--- a/mercurial/utils/stringutil.py	Sun Apr 08 12:30:59 2018 +0900
+++ b/mercurial/utils/stringutil.py	Sun Apr 08 11:55:46 2018 +0900
@@ -9,7 +9,7 @@
 
 from __future__ import absolute_import
 
-import __future__
+import ast
 import codecs
 import re as remod
 import textwrap
@@ -499,28 +499,7 @@
     """
     return _booleans.get(s.lower(), None)
 
-def evalpython(s):
-    """Evaluate a string containing a Python expression.
-
-    THIS FUNCTION IS NOT SAFE TO USE ON UNTRUSTED INPUT. IT'S USE SHOULD BE
-    LIMITED TO DEVELOPER-FACING FUNCTIONALITY.
-    """
-    globs = {
-        r'__builtins__': {
-            r'None': None,
-            r'False': False,
-            r'True': True,
-            r'int': int,
-            r'set': set,
-            r'tuple': tuple,
-            # Don't need to expose dict and list because we can use
-            # literals.
-        },
-    }
-
-    # We can't use eval() directly because it inherits compiler
-    # flags from this module and we need unicode literals for Python 3
-    # compatibility.
-    code = compile(s, r'<string>', r'eval',
-                   __future__.unicode_literals.compiler_flag, True)
-    return eval(code, globs, {})
+def evalpythonliteral(s):
+    """Evaluate a string containing a Python literal expression"""
+    # We could backport our tokenizer hack to rewrite '' to u'' if we want
+    return ast.literal_eval(s)
--- a/mercurial/wireprotoframing.py	Sun Apr 08 12:30:59 2018 +0900
+++ b/mercurial/wireprotoframing.py	Sun Apr 08 11:55:46 2018 +0900
@@ -180,9 +180,6 @@
 def makeframefromhumanstring(s):
     """Create a frame from a human readable string
 
-    DANGER: NOT SAFE TO USE WITH UNTRUSTED INPUT BECAUSE OF POTENTIAL
-    eval() USAGE. DO NOT USE IN CORE.
-
     Strings have the form:
 
         <request-id> <stream-id> <stream-flags> <type> <flags> <payload>
@@ -198,7 +195,7 @@
     Flags can be delimited by `|` to bitwise OR them together.
 
     If the payload begins with ``cbor:``, the following string will be
-    evaluated as Python code and the resulting object will be fed into
+    evaluated as Python literal and the resulting object will be fed into
     a CBOR encoder. Otherwise, the payload is interpreted as a Python
     byte string literal.
     """
@@ -229,7 +226,8 @@
             finalflags |= int(flag)
 
     if payload.startswith(b'cbor:'):
-        payload = cbor.dumps(stringutil.evalpython(payload[5:]), canonical=True)
+        payload = cbor.dumps(stringutil.evalpythonliteral(payload[5:]),
+                             canonical=True)
 
     else:
         payload = stringutil.unescapestr(payload)
--- a/tests/test-wireproto-serverreactor.py	Sun Apr 08 12:30:59 2018 +0900
+++ b/tests/test-wireproto-serverreactor.py	Sun Apr 08 11:55:46 2018 +0900
@@ -70,10 +70,6 @@
                          b'\x05\x00\x00\x01\x00\x01\x00\x10:\x00\x05:\r')
 
     def testcborstrings(self):
-        # String literals should be unicode.
-        self.assertEqual(ffs(b"1 1 0 1 0 cbor:'foo'"),
-                         b'\x04\x00\x00\x01\x00\x01\x00\x10cfoo')
-
         self.assertEqual(ffs(b"1 1 0 1 0 cbor:b'foo'"),
                          b'\x04\x00\x00\x01\x00\x01\x00\x10Cfoo')