match: move util match functions over
authorMatt Mackall <mpm@selenic.com>
Sun, 24 May 2009 02:56:14 -0500
changeset 8570 7fe2012b3bd0
parent 8569 4fadac101818
child 8571 9f12e1a27a1b
match: move util match functions over
mercurial/match.py
mercurial/util.py
--- a/mercurial/match.py	Sun May 24 02:56:14 2009 -0500
+++ b/mercurial/match.py	Sun May 24 02:56:14 2009 -0500
@@ -5,7 +5,7 @@
 # This software may be used and distributed according to the terms of the
 # GNU General Public License version 2, incorporated herein by reference.
 
-import util
+import util, re
 
 class _match(object):
     def __init__(self, root, cwd, files, mf, ap):
@@ -50,10 +50,203 @@
 class match(_match):
     def __init__(self, root, cwd, patterns, include=[], exclude=[],
                  default='glob'):
-        f, mf, ap = util.matcher(root, cwd, patterns, include, exclude,
-                                 default)
+        f, mf, ap = _matcher(root, cwd, patterns, include, exclude, default)
         _match.__init__(self, root, cwd, f, mf, ap)
 
 def patkind(pat):
-    return util._patsplit(pat, None)[0]
+    return _patsplit(pat, None)[0]
+
+def _patsplit(pat, default):
+    """Split a string into an optional pattern kind prefix and the
+    actual pattern."""
+    for prefix in 're', 'glob', 'path', 'relglob', 'relpath', 'relre':
+        if pat.startswith(prefix + ':'): return pat.split(':', 1)
+    return default, pat
+
+_globchars = set('[{*?')
+
+def _globre(pat, head='^', tail='$'):
+    "convert a glob pattern into a regexp"
+    i, n = 0, len(pat)
+    res = ''
+    group = 0
+    def peek(): return i < n and pat[i]
+    while i < n:
+        c = pat[i]
+        i = i+1
+        if c == '*':
+            if peek() == '*':
+                i += 1
+                res += '.*'
+            else:
+                res += '[^/]*'
+        elif c == '?':
+            res += '.'
+        elif c == '[':
+            j = i
+            if j < n and pat[j] in '!]':
+                j += 1
+            while j < n and pat[j] != ']':
+                j += 1
+            if j >= n:
+                res += '\\['
+            else:
+                stuff = pat[i:j].replace('\\','\\\\')
+                i = j + 1
+                if stuff[0] == '!':
+                    stuff = '^' + stuff[1:]
+                elif stuff[0] == '^':
+                    stuff = '\\' + stuff
+                res = '%s[%s]' % (res, stuff)
+        elif c == '{':
+            group += 1
+            res += '(?:'
+        elif c == '}' and group:
+            res += ')'
+            group -= 1
+        elif c == ',' and group:
+            res += '|'
+        elif c == '\\':
+            p = peek()
+            if p:
+                i += 1
+                res += re.escape(p)
+            else:
+                res += re.escape(c)
+        else:
+            res += re.escape(c)
+    return head + res + tail
+
+def _matcher(canonroot, cwd='', names=[], inc=[], exc=[], dflt_pat='glob'):
+    """build a function to match a set of file patterns
+
+    arguments:
+    canonroot - the canonical root of the tree you're matching against
+    cwd - the current working directory, if relevant
+    names - patterns to find
+    inc - patterns to include
+    exc - patterns to exclude
+    dflt_pat - if a pattern in names has no explicit type, assume this one
+
+    a pattern is one of:
+    'glob:<glob>' - a glob relative to cwd
+    're:<regexp>' - a regular expression
+    'path:<path>' - a path relative to canonroot
+    'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
+    'relpath:<path>' - a path relative to cwd
+    'relre:<regexp>' - a regexp that doesn't have to match the start of a name
+    '<something>' - one of the cases above, selected by the dflt_pat argument
+
+    returns:
+    a 3-tuple containing
+    - list of roots (places where one should start a recursive walk of the fs);
+      this often matches the explicit non-pattern names passed in, but also
+      includes the initial part of glob: patterns that has no glob characters
+    - a bool match(filename) function
+    - a bool indicating if any patterns were passed in
+    """
+
+    # a common case: no patterns at all
+    if not names and not inc and not exc:
+        return [], util.always, False
 
+    def contains_glob(name):
+        for c in name:
+            if c in _globchars: return True
+        return False
+
+    def regex(kind, name, tail):
+        '''convert a pattern into a regular expression'''
+        if not name:
+            return ''
+        if kind == 're':
+            return name
+        elif kind == 'path':
+            return '^' + re.escape(name) + '(?:/|$)'
+        elif kind == 'relglob':
+            return _globre(name, '(?:|.*/)', tail)
+        elif kind == 'relpath':
+            return re.escape(name) + '(?:/|$)'
+        elif kind == 'relre':
+            if name.startswith('^'):
+                return name
+            return '.*' + name
+        return _globre(name, '', tail)
+
+    def matchfn(pats, tail):
+        """build a matching function from a set of patterns"""
+        if not pats:
+            return
+        try:
+            pat = '(?:%s)' % '|'.join([regex(k, p, tail) for (k, p) in pats])
+            if len(pat) > 20000:
+                raise OverflowError()
+            return re.compile(pat).match
+        except OverflowError:
+            # We're using a Python with a tiny regex engine and we
+            # made it explode, so we'll divide the pattern list in two
+            # until it works
+            l = len(pats)
+            if l < 2:
+                raise
+            a, b = matchfn(pats[:l//2], tail), matchfn(pats[l//2:], tail)
+            return lambda s: a(s) or b(s)
+        except re.error:
+            for k, p in pats:
+                try:
+                    re.compile('(?:%s)' % regex(k, p, tail))
+                except re.error:
+                    raise util.Abort("invalid pattern (%s): %s" % (k, p))
+            raise util.Abort("invalid pattern")
+
+    def globprefix(pat):
+        '''return the non-glob prefix of a path, e.g. foo/* -> foo'''
+        root = []
+        for p in pat.split('/'):
+            if contains_glob(p): break
+            root.append(p)
+        return '/'.join(root) or '.'
+
+    def normalizepats(names, default):
+        pats = []
+        roots = []
+        anypats = False
+        for kind, name in [_patsplit(p, default) for p in names]:
+            if kind in ('glob', 'relpath'):
+                name = util.canonpath(canonroot, cwd, name)
+            elif kind in ('relglob', 'path'):
+                name = util.normpath(name)
+
+            pats.append((kind, name))
+
+            if kind in ('glob', 're', 'relglob', 'relre'):
+                anypats = True
+
+            if kind == 'glob':
+                root = globprefix(name)
+                roots.append(root)
+            elif kind in ('relpath', 'path'):
+                roots.append(name or '.')
+            elif kind == 'relglob':
+                roots.append('.')
+        return roots, pats, anypats
+
+    roots, pats, anypats = normalizepats(names, dflt_pat)
+
+    patmatch = matchfn(pats, '$') or util.always
+    incmatch = util.always
+    if inc:
+        dummy, inckinds, dummy = normalizepats(inc, 'glob')
+        incmatch = matchfn(inckinds, '(?:/|$)')
+    excmatch = util.never
+    if exc:
+        dummy, exckinds, dummy = normalizepats(exc, 'glob')
+        excmatch = matchfn(exckinds, '(?:/|$)')
+
+    if not names and inc and not exc:
+        # common case: hgignore patterns
+        matcher = incmatch
+    else:
+        matcher = lambda fn: incmatch(fn) and not excmatch(fn) and patmatch(fn)
+
+    return (roots, matcher, (inc or exc or anypats) and True)
--- a/mercurial/util.py	Sun May 24 02:56:14 2009 -0500
+++ b/mercurial/util.py	Sun May 24 02:56:14 2009 -0500
@@ -207,67 +207,6 @@
 def always(fn): return True
 def never(fn): return False
 
-def _patsplit(pat, default):
-    """Split a string into an optional pattern kind prefix and the
-    actual pattern."""
-    for prefix in 're', 'glob', 'path', 'relglob', 'relpath', 'relre':
-        if pat.startswith(prefix + ':'): return pat.split(':', 1)
-    return default, pat
-
-def _globre(pat, head='^', tail='$'):
-    "convert a glob pattern into a regexp"
-    i, n = 0, len(pat)
-    res = ''
-    group = 0
-    def peek(): return i < n and pat[i]
-    while i < n:
-        c = pat[i]
-        i = i+1
-        if c == '*':
-            if peek() == '*':
-                i += 1
-                res += '.*'
-            else:
-                res += '[^/]*'
-        elif c == '?':
-            res += '.'
-        elif c == '[':
-            j = i
-            if j < n and pat[j] in '!]':
-                j += 1
-            while j < n and pat[j] != ']':
-                j += 1
-            if j >= n:
-                res += '\\['
-            else:
-                stuff = pat[i:j].replace('\\','\\\\')
-                i = j + 1
-                if stuff[0] == '!':
-                    stuff = '^' + stuff[1:]
-                elif stuff[0] == '^':
-                    stuff = '\\' + stuff
-                res = '%s[%s]' % (res, stuff)
-        elif c == '{':
-            group += 1
-            res += '(?:'
-        elif c == '}' and group:
-            res += ')'
-            group -= 1
-        elif c == ',' and group:
-            res += '|'
-        elif c == '\\':
-            p = peek()
-            if p:
-                i += 1
-                res += re.escape(p)
-            else:
-                res += re.escape(c)
-        else:
-            res += re.escape(c)
-    return head + res + tail
-
-_globchars = set('[{*?')
-
 def pathto(root, n1, n2):
     '''return the relative path from one place to another.
     root should use os.sep to separate directories
@@ -342,140 +281,6 @@
 
         raise Abort('%s not under root' % myname)
 
-def matcher(canonroot, cwd='', names=[], inc=[], exc=[], dflt_pat='glob'):
-    """build a function to match a set of file patterns
-
-    arguments:
-    canonroot - the canonical root of the tree you're matching against
-    cwd - the current working directory, if relevant
-    names - patterns to find
-    inc - patterns to include
-    exc - patterns to exclude
-    dflt_pat - if a pattern in names has no explicit type, assume this one
-
-    a pattern is one of:
-    'glob:<glob>' - a glob relative to cwd
-    're:<regexp>' - a regular expression
-    'path:<path>' - a path relative to canonroot
-    'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
-    'relpath:<path>' - a path relative to cwd
-    'relre:<regexp>' - a regexp that doesn't have to match the start of a name
-    '<something>' - one of the cases above, selected by the dflt_pat argument
-
-    returns:
-    a 3-tuple containing
-    - list of roots (places where one should start a recursive walk of the fs);
-      this often matches the explicit non-pattern names passed in, but also
-      includes the initial part of glob: patterns that has no glob characters
-    - a bool match(filename) function
-    - a bool indicating if any patterns were passed in
-    """
-
-    # a common case: no patterns at all
-    if not names and not inc and not exc:
-        return [], always, False
-
-    def contains_glob(name):
-        for c in name:
-            if c in _globchars: return True
-        return False
-
-    def regex(kind, name, tail):
-        '''convert a pattern into a regular expression'''
-        if not name:
-            return ''
-        if kind == 're':
-            return name
-        elif kind == 'path':
-            return '^' + re.escape(name) + '(?:/|$)'
-        elif kind == 'relglob':
-            return _globre(name, '(?:|.*/)', tail)
-        elif kind == 'relpath':
-            return re.escape(name) + '(?:/|$)'
-        elif kind == 'relre':
-            if name.startswith('^'):
-                return name
-            return '.*' + name
-        return _globre(name, '', tail)
-
-    def matchfn(pats, tail):
-        """build a matching function from a set of patterns"""
-        if not pats:
-            return
-        try:
-            pat = '(?:%s)' % '|'.join([regex(k, p, tail) for (k, p) in pats])
-            if len(pat) > 20000:
-                raise OverflowError()
-            return re.compile(pat).match
-        except OverflowError:
-            # We're using a Python with a tiny regex engine and we
-            # made it explode, so we'll divide the pattern list in two
-            # until it works
-            l = len(pats)
-            if l < 2:
-                raise
-            a, b = matchfn(pats[:l//2], tail), matchfn(pats[l//2:], tail)
-            return lambda s: a(s) or b(s)
-        except re.error:
-            for k, p in pats:
-                try:
-                    re.compile('(?:%s)' % regex(k, p, tail))
-                except re.error:
-                    raise Abort("invalid pattern (%s): %s" % (k, p))
-            raise Abort("invalid pattern")
-
-    def globprefix(pat):
-        '''return the non-glob prefix of a path, e.g. foo/* -> foo'''
-        root = []
-        for p in pat.split('/'):
-            if contains_glob(p): break
-            root.append(p)
-        return '/'.join(root) or '.'
-
-    def normalizepats(names, default):
-        pats = []
-        roots = []
-        anypats = False
-        for kind, name in [_patsplit(p, default) for p in names]:
-            if kind in ('glob', 'relpath'):
-                name = canonpath(canonroot, cwd, name)
-            elif kind in ('relglob', 'path'):
-                name = normpath(name)
-
-            pats.append((kind, name))
-
-            if kind in ('glob', 're', 'relglob', 'relre'):
-                anypats = True
-
-            if kind == 'glob':
-                root = globprefix(name)
-                roots.append(root)
-            elif kind in ('relpath', 'path'):
-                roots.append(name or '.')
-            elif kind == 'relglob':
-                roots.append('.')
-        return roots, pats, anypats
-
-    roots, pats, anypats = normalizepats(names, dflt_pat)
-
-    patmatch = matchfn(pats, '$') or always
-    incmatch = always
-    if inc:
-        dummy, inckinds, dummy = normalizepats(inc, 'glob')
-        incmatch = matchfn(inckinds, '(?:/|$)')
-    excmatch = never
-    if exc:
-        dummy, exckinds, dummy = normalizepats(exc, 'glob')
-        excmatch = matchfn(exckinds, '(?:/|$)')
-
-    if not names and inc and not exc:
-        # common case: hgignore patterns
-        match = incmatch
-    else:
-        match = lambda fn: incmatch(fn) and not excmatch(fn) and patmatch(fn)
-
-    return (roots, match, (inc or exc or anypats) and True)
-
 _hgexecutable = None
 
 def main_is_frozen():