wireproto: convert human output frames to CBOR
authorGregory Szorc <gregory.szorc@gmail.com>
Fri, 30 Mar 2018 14:52:32 -0700
changeset 37319 36d17f37db91
parent 37318 9954d0e2ad00
child 37320 39f7d4ee8bcd
wireproto: convert human output frames to CBOR This is easier than rolling our own encoding format. As a bonus, some of our artificial limits around lengths of things went away because we are no longer using fixed length fields to hold sizes. Differential Revision: https://phab.mercurial-scm.org/D3067
mercurial/help/internals/wireprotocol.txt
mercurial/wireprotoframing.py
tests/test-wireproto-serverreactor.py
--- a/mercurial/help/internals/wireprotocol.txt	Mon Apr 02 17:06:42 2018 +0530
+++ b/mercurial/help/internals/wireprotocol.txt	Fri Mar 30 14:52:32 2018 -0700
@@ -699,26 +699,16 @@
 formatting strings with additional formatters, hence why ``%%`` is
 required to represent the literal ``%``.
 
-The raw frame consists of a series of data structures representing
-textual atoms to print. Each atom begins with a struct defining the
-size of the data that follows:
+The frame payload consists of a CBOR array of CBOR maps. Each map
+defines an *atom* of text data to print. Each *atom* has the following
+bytestring keys:
 
-* A 16-bit little endian unsigned integer denoting the length of the
-  formatting string.
-* An 8-bit unsigned integer denoting the number of label strings
-  that follow.
-* An 8-bit unsigned integer denoting the number of formatting string
-  arguments strings that follow.
-* An array of 8-bit unsigned integers denoting the lengths of
-  *labels* data.
-* An array of 16-bit unsigned integers denoting the lengths of
-  formatting strings.
-* The formatting string, encoded as UTF-8.
-* 0 or more ASCII strings defining labels to apply to this atom.
-* 0 or more UTF-8 strings that will be used as arguments to the
-  formatting string.
-
-TODO use ASCII for formatting string.
+msg
+   (bytestring) The formatting string. Content MUST be ASCII.
+args (optional)
+   Array of bytestrings defining arguments to the formatting string.
+labels (optional)
+   Array of bytestrings defining labels to apply to this atom.
 
 All data to be printed MUST be encoded into a single frame: this frame
 does not support spanning data across multiple frames.
--- a/mercurial/wireprotoframing.py	Mon Apr 02 17:06:42 2018 +0530
+++ b/mercurial/wireprotoframing.py	Fri Mar 30 14:52:32 2018 -0700
@@ -395,7 +395,8 @@
                            flags=flags,
                            payload=msg)
 
-def createtextoutputframe(stream, requestid, atoms):
+def createtextoutputframe(stream, requestid, atoms,
+                          maxframesize=DEFAULT_MAX_FRAME_SIZE):
     """Create a text output frame to render text to people.
 
     ``atoms`` is a 3-tuple of (formatting string, args, labels).
@@ -405,15 +406,9 @@
     formatters to be applied at rendering time. In terms of the ``ui``
     class, each atom corresponds to a ``ui.write()``.
     """
-    bytesleft = DEFAULT_MAX_FRAME_SIZE
-    atomchunks = []
+    atomdicts = []
 
     for (formatting, args, labels) in atoms:
-        if len(args) > 255:
-            raise ValueError('cannot use more than 255 formatting arguments')
-        if len(labels) > 255:
-            raise ValueError('cannot use more than 255 labels')
-
         # TODO look for localstr, other types here?
 
         if not isinstance(formatting, bytes):
@@ -425,8 +420,8 @@
             if not isinstance(label, bytes):
                 raise ValueError('must use bytes for labels')
 
-        # Formatting string must be UTF-8.
-        formatting = formatting.decode(r'utf-8', r'replace').encode(r'utf-8')
+        # Formatting string must be ASCII.
+        formatting = formatting.decode(r'ascii', r'replace').encode(r'ascii')
 
         # Arguments must be UTF-8.
         args = [a.decode(r'utf-8', r'replace').encode(r'utf-8') for a in args]
@@ -435,36 +430,23 @@
         labels = [l.decode(r'ascii', r'strict').encode(r'ascii')
                   for l in labels]
 
-        if len(formatting) > 65535:
-            raise ValueError('formatting string cannot be longer than 64k')
-
-        if any(len(a) > 65535 for a in args):
-            raise ValueError('argument string cannot be longer than 64k')
-
-        if any(len(l) > 255 for l in labels):
-            raise ValueError('label string cannot be longer than 255 bytes')
+        atom = {b'msg': formatting}
+        if args:
+            atom[b'args'] = args
+        if labels:
+            atom[b'labels'] = labels
 
-        chunks = [
-            struct.pack(r'<H', len(formatting)),
-            struct.pack(r'<BB', len(labels), len(args)),
-            struct.pack(r'<' + r'B' * len(labels), *map(len, labels)),
-            struct.pack(r'<' + r'H' * len(args), *map(len, args)),
-        ]
-        chunks.append(formatting)
-        chunks.extend(labels)
-        chunks.extend(args)
+        atomdicts.append(atom)
 
-        atom = b''.join(chunks)
-        atomchunks.append(atom)
-        bytesleft -= len(atom)
+    payload = cbor.dumps(atomdicts, canonical=True)
 
-    if bytesleft < 0:
+    if len(payload) > maxframesize:
         raise ValueError('cannot encode data in a single frame')
 
     yield stream.makeframe(requestid=requestid,
                            typeid=FRAME_TYPE_TEXT_OUTPUT,
                            flags=0,
-                           payload=b''.join(atomchunks))
+                           payload=payload)
 
 class stream(object):
     """Represents a logical unidirectional series of frames."""
--- a/tests/test-wireproto-serverreactor.py	Mon Apr 02 17:06:42 2018 +0530
+++ b/tests/test-wireproto-serverreactor.py	Fri Mar 30 14:52:32 2018 -0700
@@ -136,22 +136,6 @@
             ffs(b'1 1 0 command-data eos %s' % data.getvalue()),
         ])
 
-    def testtextoutputexcessiveargs(self):
-        """At most 255 formatting arguments are allowed."""
-        with self.assertRaisesRegexp(ValueError,
-                                     'cannot use more than 255 formatting'):
-            args = [b'x' for i in range(256)]
-            list(framing.createtextoutputframe(None, 1,
-                                               [(b'bleh', args, [])]))
-
-    def testtextoutputexcessivelabels(self):
-        """At most 255 labels are allowed."""
-        with self.assertRaisesRegexp(ValueError,
-                                     'cannot use more than 255 labels'):
-            labels = [b'l' for i in range(256)]
-            list(framing.createtextoutputframe(None, 1,
-                                               [(b'bleh', [], labels)]))
-
     def testtextoutputformattingstringtype(self):
         """Formatting string must be bytes."""
         with self.assertRaisesRegexp(ValueError, 'must use bytes formatting '):
@@ -168,31 +152,14 @@
             list(framing.createtextoutputframe(None, 1, [
                 (b'foo', [], [b'foo'.decode('ascii')])]))
 
-    def testtextoutputtoolongformatstring(self):
-        with self.assertRaisesRegexp(ValueError,
-                                     'formatting string cannot be longer than'):
-            list(framing.createtextoutputframe(None, 1, [
-                (b'x' * 65536, [], [])]))
-
-    def testtextoutputtoolongargumentstring(self):
-        with self.assertRaisesRegexp(ValueError,
-                                     'argument string cannot be longer than'):
-            list(framing.createtextoutputframe(None, 1, [
-                (b'bleh', [b'x' * 65536], [])]))
-
-    def testtextoutputtoolonglabelstring(self):
-        with self.assertRaisesRegexp(ValueError,
-                                     'label string cannot be longer than'):
-            list(framing.createtextoutputframe(None, 1, [
-                (b'bleh', [], [b'x' * 65536])]))
-
     def testtextoutput1simpleatom(self):
         stream = framing.stream(1)
         val = list(framing.createtextoutputframe(stream, 1, [
             (b'foo', [], [])]))
 
         self.assertEqual(val, [
-            ffs(br'1 1 stream-begin text-output 0 \x03\x00\x00\x00foo'),
+            ffs(b'1 1 stream-begin text-output 0 '
+                b"cbor:[{b'msg': b'foo'}]"),
         ])
 
     def testtextoutput2simpleatoms(self):
@@ -203,8 +170,8 @@
         ]))
 
         self.assertEqual(val, [
-            ffs(br'1 1 stream-begin text-output 0 '
-                br'\x03\x00\x00\x00foo\x03\x00\x00\x00bar'),
+            ffs(b'1 1 stream-begin text-output 0 '
+                b"cbor:[{b'msg': b'foo'}, {b'msg': b'bar'}]")
         ])
 
     def testtextoutput1arg(self):
@@ -214,8 +181,8 @@
         ]))
 
         self.assertEqual(val, [
-            ffs(br'1 1 stream-begin text-output 0 '
-                br'\x06\x00\x00\x01\x04\x00foo %sval1'),
+            ffs(b'1 1 stream-begin text-output 0 '
+                b"cbor:[{b'msg': b'foo %s', b'args': [b'val1']}]")
         ])
 
     def testtextoutput2arg(self):
@@ -225,8 +192,8 @@
         ]))
 
         self.assertEqual(val, [
-            ffs(br'1 1 stream-begin text-output 0 '
-                br'\x09\x00\x00\x02\x03\x00\x05\x00foo %s %svalvalue'),
+            ffs(b'1 1 stream-begin text-output 0 '
+                b"cbor:[{b'msg': b'foo %s %s', b'args': [b'val', b'value']}]")
         ])
 
     def testtextoutput1label(self):
@@ -236,8 +203,8 @@
         ]))
 
         self.assertEqual(val, [
-            ffs(br'1 1 stream-begin text-output 0 '
-                br'\x03\x00\x01\x00\x05foolabel'),
+            ffs(b'1 1 stream-begin text-output 0 '
+                b"cbor:[{b'msg': b'foo', b'labels': [b'label']}]")
         ])
 
     def testargandlabel(self):
@@ -247,8 +214,9 @@
         ]))
 
         self.assertEqual(val, [
-            ffs(br'1 1 stream-begin text-output 0 '
-                br'\x06\x00\x01\x01\x05\x03\x00foo %slabelarg'),
+            ffs(b'1 1 stream-begin text-output 0 '
+                b"cbor:[{b'msg': b'foo %s', b'args': [b'arg'], "
+                b"b'labels': [b'label']}]")
         ])
 
 class ServerReactorTests(unittest.TestCase):