ui: support quotes in configlist (issue2147)
authorHenrik Stuart <hg@hstuart.dk>
Sun, 25 Apr 2010 17:38:41 +0200
changeset 10982 0a548640e012
parent 10981 c72358052e0b
child 10983 287a5cdf7743
ui: support quotes in configlist (issue2147) Several places that use ui.configlist, predominantly in authentication scenarios need to interface with systems that can contain spaces in usernames (e.g. when client certificates are usernames, or Windows usernames). This changeset introduces a parser that supports quoting of strings, and escape quotation marks that get decoded into a single quotation mark that adopts the usual behavior one would expect from quoting strings. The Python library shlex module is not used, on purpose, as that raises if it cannot match quotation marks in the given input.
doc/hgrc.5.txt
mercurial/ui.py
tests/test-ui-config.py
tests/test-ui-config.py.out
--- a/doc/hgrc.5.txt	Sun Apr 25 17:11:50 2010 +0200
+++ b/doc/hgrc.5.txt	Sun Apr 25 17:38:41 2010 +0200
@@ -111,10 +111,18 @@
 section, if it has been set previously.
 
 The values are either free-form text strings, lists of text strings,
-or Boolean values. Lists are split on whitespace and commas. Boolean
-values can be set to true using any of "1", "yes", "true", or "on" and
-to false using "0", "no", "false", or "off" (all case insensitive).
+or Boolean values. Boolean values can be set to true using any of "1",
+"yes", "true", or "on" and to false using "0", "no", "false", or "off"
+(all case insensitive).
 
+List values are separated by whitespace or comma, except when values are
+placed in double quotation marks::
+
+  allow_read = "John Doe, PhD", brian, betty
+
+Quotation marks can be escaped by prefixing them with a backslash. Only
+quotation marks at the beginning of a word is counted as a quotation
+(e.g., ``foo"bar baz`` is the list of ``foo"bar`` and ``baz``).
 
 Sections
 --------
@@ -867,20 +875,18 @@
     push is not allowed. If the special value ``*``, any remote user can
     push, including unauthenticated users. Otherwise, the remote user
     must have been authenticated, and the authenticated user name must
-    be present in this list (separated by whitespace or ``,``). The
-    contents of the allow_push list are examined after the deny_push
-    list.
+    be present in this list. The contents of the allow_push list are
+    examined after the deny_push list.
 ``allow_read``
     If the user has not already been denied repository access due to
     the contents of deny_read, this list determines whether to grant
     repository access to the user. If this list is not empty, and the
-    user is unauthenticated or not present in the list (separated by
-    whitespace or ``,``), then access is denied for the user. If the
-    list is empty or not set, then access is permitted to all users by
-    default. Setting allow_read to the special value ``*`` is equivalent
-    to it not being set (i.e. access is permitted to all users). The
-    contents of the allow_read list are examined after the deny_read
-    list.
+    user is unauthenticated or not present in the list, then access is
+    denied for the user. If the list is empty or not set, then access
+    is permitted to all users by default. Setting allow_read to the
+    special value ``*`` is equivalent to it not being set (i.e. access
+    is permitted to all users). The contents of the allow_read list are
+    examined after the deny_read list.
 ``allowzip``
     (DEPRECATED) Whether to allow .zip downloading of repository
     revisions. Default is False. This feature creates temporary files.
@@ -915,17 +921,15 @@
     Whether to deny pushing to the repository. If empty or not set,
     push is not denied. If the special value ``*``, all remote users are
     denied push. Otherwise, unauthenticated users are all denied, and
-    any authenticated user name present in this list (separated by
-    whitespace or ``,``) is also denied. The contents of the deny_push
-    list are examined before the allow_push list.
+    any authenticated user name present in this list is also denied. The
+    contents of the deny_push list are examined before the allow_push list.
 ``deny_read``
     Whether to deny reading/viewing of the repository. If this list is
     not empty, unauthenticated users are all denied, and any
-    authenticated user name present in this list (separated by
-    whitespace or ``,``) is also denied access to the repository. If set
-    to the special value ``*``, all remote users are denied access
-    (rarely needed ;). If deny_read is empty or not set, the
-    determination of repository access depends on the presence and
+    authenticated user name present in this list is also denied access to
+    the repository. If set to the special value ``*``, all remote users
+    are denied access (rarely needed ;). If deny_read is empty or not set,
+    the determination of repository access depends on the presence and
     content of the allow_read list (see description). If both
     deny_read and allow_read are empty or not set, then access is
     permitted to all users by default. If the repository is being
--- a/mercurial/ui.py	Sun Apr 25 17:11:50 2010 +0200
+++ b/mercurial/ui.py	Sun Apr 25 17:38:41 2010 +0200
@@ -154,11 +154,82 @@
 
     def configlist(self, section, name, default=None, untrusted=False):
         """Return a list of comma/space separated strings"""
+
+        def _parse_plain(parts, s, offset):
+            whitespace = False
+            while offset < len(s) and (s[offset].isspace() or s[offset] == ','):
+                whitespace = True
+                offset += 1
+            if offset >= len(s):
+                return None, parts, offset
+            if whitespace:
+                parts.append('')
+            if s[offset] == '"' and not parts[-1]:
+                return _parse_quote, parts, offset + 1
+            elif s[offset] == '"' and parts[-1][-1] == '\\':
+                parts[-1] = parts[-1][:-1] + s[offset]
+                return _parse_plain, parts, offset + 1
+            parts[-1] += s[offset]
+            return _parse_plain, parts, offset + 1
+
+        def _parse_quote(parts, s, offset):
+            if offset < len(s) and s[offset] == '"': # ""
+                parts.append('')
+                offset += 1
+                while offset < len(s) and (s[offset].isspace() or
+                        s[offset] == ','):
+                    offset += 1
+                return _parse_plain, parts, offset
+
+            while offset < len(s) and s[offset] != '"':
+                if s[offset] == '\\' and offset + 1 < len(s) and s[offset + 1] == '"':
+                    offset += 1
+                    parts[-1] += '"'
+                else:
+                    parts[-1] += s[offset]
+                offset += 1
+
+            if offset >= len(s):
+                real_parts = _configlist(parts[-1])
+                if not real_parts:
+                    parts[-1] = '"'
+                else:
+                    real_parts[0] = '"' + real_parts[0]
+                    parts = parts[:-1]
+                    parts.extend(real_parts)
+                return None, parts, offset
+
+            offset += 1
+            while offset < len(s) and s[offset] in [' ', ',']:
+                offset += 1
+
+            if offset < len(s):
+                if offset + 1 == len(s) and s[offset] == '"':
+                    parts[-1] += '"'
+                    offset += 1
+                else:
+                    parts.append('')
+            else:
+                return None, parts, offset
+
+            return _parse_plain, parts, offset
+
+        def _configlist(s):
+            s = s.rstrip(' ,')
+            if not s:
+                return None
+            parser, parts, offset = _parse_plain, [''], 0
+            while parser:
+                parser, parts, offset = parser(parts, s, offset)
+            return parts
+
         result = self.config(section, name, untrusted=untrusted)
         if result is None:
             result = default or []
         if isinstance(result, basestring):
-            result = result.replace(",", " ").split()
+            result = _configlist(result)
+            if result is None:
+                result = default or []
         return result
 
     def has_section(self, section, untrusted=False):
--- a/tests/test-ui-config.py	Sun Apr 25 17:11:50 2010 +0200
+++ b/tests/test-ui-config.py	Sun Apr 25 17:38:41 2010 +0200
@@ -11,6 +11,19 @@
     'lists.list2=foo bar baz',
     'lists.list3=alice, bob',
     'lists.list4=foo bar baz alice, bob',
+    'lists.list5=abc d"ef"g "hij def"',
+    'lists.list6="hello world", "how are you?"',
+    'lists.list7=Do"Not"Separate',
+    'lists.list8="Do"Separate',
+    'lists.list9="Do\\"NotSeparate"',
+    'lists.list10=string "with extraneous" quotation mark"',
+    'lists.list11=x, y',
+    'lists.list12="x", "y"',
+    'lists.list13=""" key = "x", "y" """',
+    'lists.list14=,,,,     ',
+    'lists.list15=" just with starting quotation',
+    'lists.list16="longer quotation" with "no ending quotation',
+    'lists.list17=this is \\" "not a quotation mark"',
 ])
 
 print repr(testui.configitems('values'))
@@ -36,6 +49,19 @@
 print repr(testui.configlist('lists', 'list3'))
 print repr(testui.configlist('lists', 'list4'))
 print repr(testui.configlist('lists', 'list4', ['foo']))
+print repr(testui.configlist('lists', 'list5'))
+print repr(testui.configlist('lists', 'list6'))
+print repr(testui.configlist('lists', 'list7'))
+print repr(testui.configlist('lists', 'list8'))
+print repr(testui.configlist('lists', 'list9'))
+print repr(testui.configlist('lists', 'list10'))
+print repr(testui.configlist('lists', 'list11'))
+print repr(testui.configlist('lists', 'list12'))
+print repr(testui.configlist('lists', 'list13'))
+print repr(testui.configlist('lists', 'list14'))
+print repr(testui.configlist('lists', 'list15'))
+print repr(testui.configlist('lists', 'list16'))
+print repr(testui.configlist('lists', 'list17'))
 print repr(testui.configlist('lists', 'unknown'))
 print repr(testui.configlist('lists', 'unknown', ''))
 print repr(testui.configlist('lists', 'unknown', 'foo'))
--- a/tests/test-ui-config.py.out	Sun Apr 25 17:11:50 2010 +0200
+++ b/tests/test-ui-config.py.out	Sun Apr 25 17:38:41 2010 +0200
@@ -1,5 +1,5 @@
 [('string', 'string value'), ('bool1', 'true'), ('bool2', 'false')]
-[('list1', 'foo'), ('list2', 'foo bar baz'), ('list3', 'alice, bob'), ('list4', 'foo bar baz alice, bob')]
+[('list1', 'foo'), ('list2', 'foo bar baz'), ('list3', 'alice, bob'), ('list4', 'foo bar baz alice, bob'), ('list5', 'abc d"ef"g "hij def"'), ('list6', '"hello world", "how are you?"'), ('list7', 'Do"Not"Separate'), ('list8', '"Do"Separate'), ('list9', '"Do\\"NotSeparate"'), ('list10', 'string "with extraneous" quotation mark"'), ('list11', 'x, y'), ('list12', '"x", "y"'), ('list13', '""" key = "x", "y" """'), ('list14', ',,,,     '), ('list15', '" just with starting quotation'), ('list16', '"longer quotation" with "no ending quotation'), ('list17', 'this is \\" "not a quotation mark"')]
 ---
 'string value'
 'true'
@@ -18,6 +18,19 @@
 ['alice', 'bob']
 ['foo', 'bar', 'baz', 'alice', 'bob']
 ['foo', 'bar', 'baz', 'alice', 'bob']
+['abc', 'd"ef"g', 'hij def']
+['hello world', 'how are you?']
+['Do"Not"Separate']
+['Do', 'Separate']
+['Do"NotSeparate']
+['string', 'with extraneous', 'quotation', 'mark"']
+['x', 'y']
+['x', 'y']
+['', ' key = ', 'x"', 'y', '', '"']
+[]
+['"', 'just', 'with', 'starting', 'quotation']
+['longer quotation', 'with', '"no', 'ending', 'quotation']
+['this', 'is', '"', 'not a quotation mark']
 []
 []
 ['foo']