util: teach stringmatcher to handle forced case insensitive matches
authorMatt Harbison <matt_harbison@yahoo.com>
Wed, 11 Jan 2017 21:47:19 -0500
changeset 30773 c390b40fe1d7
parent 30772 b1012cb1bec3
child 30774 eaa5607132a2
util: teach stringmatcher to handle forced case insensitive matches The 'author' and 'desc' revsets are documented to be case insensitive. Unfortunately, this was implemented in 'author' by forcing the input to lowercase, including for regex like '\B'. (This actually inverts the meaning of the sequence.) For backward compatibility, we will keep that a case insensitive regex, but by using matcher options instead of brute force. This doesn't preclude future hypothetical 'icase-literal:' style prefixes that can be provided by the user. Such user specified cases can probably be handled up front by stripping 'icase-', setting the variable, and letting it drop through the existing code.
mercurial/util.py
--- a/mercurial/util.py	Wed Jan 11 23:13:51 2017 -0500
+++ b/mercurial/util.py	Wed Jan 11 21:47:19 2017 -0500
@@ -1996,7 +1996,7 @@
         start, stop = lower(date), upper(date)
         return lambda x: x >= start and x <= stop
 
-def stringmatcher(pattern):
+def stringmatcher(pattern, casesensitive=True):
     """
     accepts a string, possibly starting with 're:' or 'literal:' prefix.
     returns the matcher name, pattern, and matcher function.
@@ -2006,6 +2006,9 @@
     >>> def test(pattern, *tests):
     ...     kind, pattern, matcher = stringmatcher(pattern)
     ...     return (kind, pattern, [bool(matcher(t)) for t in tests])
+    >>> def itest(pattern, *tests):
+    ...     kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
+    ...     return (kind, pattern, [bool(matcher(t)) for t in tests])
 
     exact matching (no prefix):
     >>> test('abcdefg', 'abc', 'def', 'abcdefg')
@@ -2022,18 +2025,35 @@
     unknown prefixes are ignored and treated as literals
     >>> test('foo:bar', 'foo', 'bar', 'foo:bar')
     ('literal', 'foo:bar', [False, False, True])
+
+    case insensitive regex matches
+    >>> itest('re:A.+b', 'nomatch', 'fooadef', 'fooadefBar')
+    ('re', 'A.+b', [False, False, True])
+
+    case insensitive literal matches
+    >>> itest('ABCDEFG', 'abc', 'def', 'abcdefg')
+    ('literal', 'ABCDEFG', [False, False, True])
     """
     if pattern.startswith('re:'):
         pattern = pattern[3:]
         try:
-            regex = remod.compile(pattern)
+            flags = 0
+            if not casesensitive:
+                flags = remod.I
+            regex = remod.compile(pattern, flags)
         except remod.error as e:
             raise error.ParseError(_('invalid regular expression: %s')
                                    % e)
         return 're', pattern, regex.search
     elif pattern.startswith('literal:'):
         pattern = pattern[8:]
-    return 'literal', pattern, pattern.__eq__
+
+    match = pattern.__eq__
+
+    if not casesensitive:
+        ipat = encoding.lower(pattern)
+        match = lambda s: ipat == encoding.lower(s)
+    return 'literal', pattern, match
 
 def shortuser(user):
     """Return a short representation of a user name or email address."""