mercurial: comparison mercurial/utils/stringutil.py

equal deleted inserted replaced

-:2506c3ac73f4
+:bbbb5213d043
 import codecs
 import re as remod
 import textwrap
 import types
+from typing import (
+Optional,
+overload,
+)
 from ..i18n import _
 from ..thirdparty import attr
 from .. import (
 encoding,
 # regex special chars pulled from https://bugs.python.org/issue29995
 # which was part of Python 3.7.
 _respecial = pycompat.bytestr(b'()[]{}?*+-|^$\\.&~# \t\n\r\v\f')
 _regexescapemap = {ord(i): (b'\\' + i).decode('latin1') for i in _respecial}
 regexbytesescapemap = {i: (b'\\' + i) for i in _respecial}
+@overload
+def reescape(pat: bytes) -> bytes:
+...
+@overload
+def reescape(pat: str) -> str:
+...
 def reescape(pat):
 """Drop-in replacement for re.escape."""
 # NOTE: it is intentional that this works on unicodes and not
 if wantuni:
 return pat
 return pat.encode('latin1')
-def pprint(o, bprefix=False, indent=0, level=0):
+def pprint(o, bprefix: bool = False, indent: int = 0, level: int = 0) -> bytes:
 """Pretty print an object."""
 return b''.join(pprintgen(o, bprefix=bprefix, indent=indent, level=level))
-def pprintgen(o, bprefix=False, indent=0, level=0):
+def pprintgen(o, bprefix: bool = False, indent: int = 0, level: int = 0):
 """Pretty print an object to a generator of atoms.
 ``bprefix`` is a flag influencing whether bytestrings are preferred with
 a ``b''`` prefix.
 yield b']'
 else:
 yield pycompat.byterepr(o)
-def prettyrepr(o):
+def prettyrepr(o) -> bytes:
 """Pretty print a representation of a possibly-nested object"""
 lines = []
 rs = pycompat.byterepr(o)
 p0 = p1 = 0
 while p0 < len(rs):
 lines.append((l, rs[p0:q0].rstrip()))
 p0, p1 = q0, q1
 return b'\n'.join(b'  ' * l + s for l, s in lines)
-def buildrepr(r):
+def buildrepr(r) -> bytes:
 """Format an optional printable representation from unexpanded bits
 ========  =================================
 type(r)   example
 ========  =================================
 return r()
 else:
 return pprint(r)
-def binary(s):
+def binary(s: bytes) -> bool:
 """return true if a string is binary data"""
 return bool(s and b'\0' in s)
-def _splitpattern(pattern):
+def _splitpattern(pattern: bytes):
 if pattern.startswith(b're:'):
 return b're', pattern[3:]
 elif pattern.startswith(b'literal:'):
 return b'literal', pattern[8:]
 return b'literal', pattern
-def stringmatcher(pattern, casesensitive=True):
+def stringmatcher(pattern: bytes, casesensitive: bool = True):
 """
 accepts a string, possibly starting with 're:' or 'literal:' prefix.
 returns the matcher name, pattern, and matcher function.
 missing or unknown prefixes are treated as literal matches.
 return kind, pattern, match
 raise error.ProgrammingError(b'unhandled pattern kind: %s' % kind)
-def substringregexp(pattern, flags=0):
+def substringregexp(pattern: bytes, flags: int = 0):
 """Build a regexp object from a string pattern possibly starting with
 're:' or 'literal:' prefix.
 helper for tests:
 >>> def test(pattern, *tests):
 return remod.compile(remod.escape(pattern), flags)
 raise error.ProgrammingError(b'unhandled pattern kind: %s' % kind)
-def shortuser(user):
+def shortuser(user: bytes) -> bytes:
 """Return a short representation of a user name or email address."""
 f = user.find(b'@')
 if f >= 0:
 user = user[:f]
 f = user.find(b'<')
 if f >= 0:
 user = user[:f]
 return user
-def emailuser(user):
+def emailuser(user: bytes) -> bytes:
 """Return the user portion of an email address."""
 f = user.find(b'@')
 if f >= 0:
 user = user[:f]
 f = user.find(b'<')
 if f >= 0:
 user = user[f + 1 :]
 return user
-def email(author):
+def email(author: bytes) -> bytes:
 '''get email of author.'''
 r = author.find(b'>')
 if r == -1:
 r = None
 return author[author.find(b'<') + 1 : r]
-def person(author):
+def person(author: bytes) -> bytes:
 """Returns the name before an email address,
 interpreting it as per RFC 5322
 >>> person(b'foo@bar')
 'foo'
 )
 return mailmap
-def mapname(mailmap, author):
+def mapname(mailmap, author: bytes) -> bytes:
 """Returns the author field according to the mailmap cache, or
 the original author field.
 >>> mmdata = b"\\n".join([
 ...     b'# Comment',
 _correctauthorformat = remod.compile(br'^[^<]+\s<[^<>]+@[^<>]+>$')
-def isauthorwellformed(author):
+def isauthorwellformed(author: bytes) -> bool:
 """Return True if the author field is well formed
 (ie "Contributor Name <contrib@email.dom>")
 >>> isauthorwellformed(b'Good Author <good@author.com>')
 True
 False
 """
 return _correctauthorformat.match(author) is not None
-def firstline(text):
+def firstline(text: bytes) -> bytes:
 """Return the first line of the input"""
 # Try to avoid running splitlines() on the whole string
 i = text.find(b'\n')
 if i != -1:
 text = text[:i]
 return text.splitlines()[0]
 except IndexError:
 return b''
-def ellipsis(text, maxlength=400):
+def ellipsis(text: bytes, maxlength: int = 400) -> bytes:
 """Trim string to at most maxlength (default: 400) columns in display."""
 return encoding.trim(text, maxlength, ellipsis=b'...')
-def escapestr(s):
+def escapestr(s: bytes) -> bytes:
+# "bytes" is also a typing shortcut for bytes, bytearray, and memoryview
 if isinstance(s, memoryview):
 s = bytes(s)
 # call underlying function of s.encode('string_escape') directly for
 # Python 3 compatibility
 return codecs.escape_encode(s)[0]  # pytype: disable=module-attr
-def unescapestr(s):
+def unescapestr(s: bytes) -> bytes:
 return codecs.escape_decode(s)[0]  # pytype: disable=module-attr
 def forcebytestr(obj):
 """Portably format an arbitrary object (e.g. exception) into a byte
 except UnicodeEncodeError:
 # non-ascii string, may be lossy
 return pycompat.bytestr(encoding.strtolocal(str(obj)))
-def uirepr(s):
+def uirepr(s: bytes) -> bytes:
 # Avoid double backslash in Windows path repr()
 return pycompat.byterepr(pycompat.bytestr(s)).replace(b'\\\\', b'\\')
 # delay import of textwrap
 global _MBTextWrapper
 _MBTextWrapper = tw
 return tw(**kwargs)
-def wrap(line, width, initindent=b'', hangindent=b''):
+def wrap(
+line: bytes, width: int, initindent: bytes = b'', hangindent: bytes = b''
+) -> bytes:
 maxindent = max(len(hangindent), len(initindent))
 if width <= maxindent:
 # adjust for weird terminal size
 width = max(78, maxindent + 1)
 line = line.decode(
 b'off': False,
 b'never': False,
 }
-def parsebool(s):
+def parsebool(s: bytes) -> Optional[bool]:
 """Parse s into a boolean.
 If s is not a valid boolean, returns None.
 """
 return _booleans.get(s.lower(), None)
-def parselist(value):
+# TODO: make arg mandatory (and fix code below?)
+def parselist(value: Optional[bytes]):
 """parse a configuration value as a list of comma/space separated strings
 >>> parselist(b'this,is "a small" ,test')
 ['this', 'is', 'a small', 'test']
 """
 else:
 result = value
 return result or []
-def evalpythonliteral(s):
+def evalpythonliteral(s: bytes):
 """Evaluate a string containing a Python literal expression"""
 # We could backport our tokenizer hack to rewrite '' to u'' if we want
 return ast.literal_eval(s.decode('latin1'))

changeset 49575	bbbb5213d043
parent 49284	d44e3c45f0e4
child 49648	9be765b82a90