26 # regex special chars pulled from https://bugs.python.org/issue29995 |
31 # regex special chars pulled from https://bugs.python.org/issue29995 |
27 # which was part of Python 3.7. |
32 # which was part of Python 3.7. |
28 _respecial = pycompat.bytestr(b'()[]{}?*+-|^$\\.&~# \t\n\r\v\f') |
33 _respecial = pycompat.bytestr(b'()[]{}?*+-|^$\\.&~# \t\n\r\v\f') |
29 _regexescapemap = {ord(i): (b'\\' + i).decode('latin1') for i in _respecial} |
34 _regexescapemap = {ord(i): (b'\\' + i).decode('latin1') for i in _respecial} |
30 regexbytesescapemap = {i: (b'\\' + i) for i in _respecial} |
35 regexbytesescapemap = {i: (b'\\' + i) for i in _respecial} |
|
36 |
|
37 |
|
38 @overload |
|
39 def reescape(pat: bytes) -> bytes: |
|
40 ... |
|
41 |
|
42 |
|
43 @overload |
|
44 def reescape(pat: str) -> str: |
|
45 ... |
31 |
46 |
32 |
47 |
33 def reescape(pat): |
48 def reescape(pat): |
34 """Drop-in replacement for re.escape.""" |
49 """Drop-in replacement for re.escape.""" |
35 # NOTE: it is intentional that this works on unicodes and not |
50 # NOTE: it is intentional that this works on unicodes and not |
43 if wantuni: |
58 if wantuni: |
44 return pat |
59 return pat |
45 return pat.encode('latin1') |
60 return pat.encode('latin1') |
46 |
61 |
47 |
62 |
48 def pprint(o, bprefix=False, indent=0, level=0): |
63 def pprint(o, bprefix: bool = False, indent: int = 0, level: int = 0) -> bytes: |
49 """Pretty print an object.""" |
64 """Pretty print an object.""" |
50 return b''.join(pprintgen(o, bprefix=bprefix, indent=indent, level=level)) |
65 return b''.join(pprintgen(o, bprefix=bprefix, indent=indent, level=level)) |
51 |
66 |
52 |
67 |
53 def pprintgen(o, bprefix=False, indent=0, level=0): |
68 def pprintgen(o, bprefix: bool = False, indent: int = 0, level: int = 0): |
54 """Pretty print an object to a generator of atoms. |
69 """Pretty print an object to a generator of atoms. |
55 |
70 |
56 ``bprefix`` is a flag influencing whether bytestrings are preferred with |
71 ``bprefix`` is a flag influencing whether bytestrings are preferred with |
57 a ``b''`` prefix. |
72 a ``b''`` prefix. |
58 |
73 |
279 lines.append((l, rs[p0:q0].rstrip())) |
294 lines.append((l, rs[p0:q0].rstrip())) |
280 p0, p1 = q0, q1 |
295 p0, p1 = q0, q1 |
281 return b'\n'.join(b' ' * l + s for l, s in lines) |
296 return b'\n'.join(b' ' * l + s for l, s in lines) |
282 |
297 |
283 |
298 |
284 def buildrepr(r): |
299 def buildrepr(r) -> bytes: |
285 """Format an optional printable representation from unexpanded bits |
300 """Format an optional printable representation from unexpanded bits |
286 |
301 |
287 ======== ================================= |
302 ======== ================================= |
288 type(r) example |
303 type(r) example |
289 ======== ================================= |
304 ======== ================================= |
303 return r() |
318 return r() |
304 else: |
319 else: |
305 return pprint(r) |
320 return pprint(r) |
306 |
321 |
307 |
322 |
308 def binary(s): |
323 def binary(s: bytes) -> bool: |
309 """return true if a string is binary data""" |
324 """return true if a string is binary data""" |
310 return bool(s and b'\0' in s) |
325 return bool(s and b'\0' in s) |
311 |
326 |
312 |
327 |
313 def _splitpattern(pattern): |
328 def _splitpattern(pattern: bytes): |
314 if pattern.startswith(b're:'): |
329 if pattern.startswith(b're:'): |
315 return b're', pattern[3:] |
330 return b're', pattern[3:] |
316 elif pattern.startswith(b'literal:'): |
331 elif pattern.startswith(b'literal:'): |
317 return b'literal', pattern[8:] |
332 return b'literal', pattern[8:] |
318 return b'literal', pattern |
333 return b'literal', pattern |
319 |
334 |
320 |
335 |
321 def stringmatcher(pattern, casesensitive=True): |
336 def stringmatcher(pattern: bytes, casesensitive: bool = True): |
322 """ |
337 """ |
323 accepts a string, possibly starting with 're:' or 'literal:' prefix. |
338 accepts a string, possibly starting with 're:' or 'literal:' prefix. |
324 returns the matcher name, pattern, and matcher function. |
339 returns the matcher name, pattern, and matcher function. |
325 missing or unknown prefixes are treated as literal matches. |
340 missing or unknown prefixes are treated as literal matches. |
326 |
341 |
377 return kind, pattern, match |
392 return kind, pattern, match |
378 |
393 |
379 raise error.ProgrammingError(b'unhandled pattern kind: %s' % kind) |
394 raise error.ProgrammingError(b'unhandled pattern kind: %s' % kind) |
380 |
395 |
381 |
396 |
382 def substringregexp(pattern, flags=0): |
397 def substringregexp(pattern: bytes, flags: int = 0): |
383 """Build a regexp object from a string pattern possibly starting with |
398 """Build a regexp object from a string pattern possibly starting with |
384 're:' or 'literal:' prefix. |
399 're:' or 'literal:' prefix. |
385 |
400 |
386 helper for tests: |
401 helper for tests: |
387 >>> def test(pattern, *tests): |
402 >>> def test(pattern, *tests): |
446 if f >= 0: |
461 if f >= 0: |
447 user = user[:f] |
462 user = user[:f] |
448 return user |
463 return user |
449 |
464 |
450 |
465 |
451 def emailuser(user): |
466 def emailuser(user: bytes) -> bytes: |
452 """Return the user portion of an email address.""" |
467 """Return the user portion of an email address.""" |
453 f = user.find(b'@') |
468 f = user.find(b'@') |
454 if f >= 0: |
469 if f >= 0: |
455 user = user[:f] |
470 user = user[:f] |
456 f = user.find(b'<') |
471 f = user.find(b'<') |
457 if f >= 0: |
472 if f >= 0: |
458 user = user[f + 1 :] |
473 user = user[f + 1 :] |
459 return user |
474 return user |
460 |
475 |
461 |
476 |
462 def email(author): |
477 def email(author: bytes) -> bytes: |
463 '''get email of author.''' |
478 '''get email of author.''' |
464 r = author.find(b'>') |
479 r = author.find(b'>') |
465 if r == -1: |
480 if r == -1: |
466 r = None |
481 r = None |
467 return author[author.find(b'<') + 1 : r] |
482 return author[author.find(b'<') + 1 : r] |
468 |
483 |
469 |
484 |
470 def person(author): |
485 def person(author: bytes) -> bytes: |
471 """Returns the name before an email address, |
486 """Returns the name before an email address, |
472 interpreting it as per RFC 5322 |
487 interpreting it as per RFC 5322 |
473 |
488 |
474 >>> person(b'foo@bar') |
489 >>> person(b'foo@bar') |
475 'foo' |
490 'foo' |
610 ) |
625 ) |
611 |
626 |
612 return mailmap |
627 return mailmap |
613 |
628 |
614 |
629 |
615 def mapname(mailmap, author): |
630 def mapname(mailmap, author: bytes) -> bytes: |
616 """Returns the author field according to the mailmap cache, or |
631 """Returns the author field according to the mailmap cache, or |
617 the original author field. |
632 the original author field. |
618 |
633 |
619 >>> mmdata = b"\\n".join([ |
634 >>> mmdata = b"\\n".join([ |
620 ... b'# Comment', |
635 ... b'# Comment', |
661 |
676 |
662 |
677 |
663 _correctauthorformat = remod.compile(br'^[^<]+\s<[^<>]+@[^<>]+>$') |
678 _correctauthorformat = remod.compile(br'^[^<]+\s<[^<>]+@[^<>]+>$') |
664 |
679 |
665 |
680 |
666 def isauthorwellformed(author): |
681 def isauthorwellformed(author: bytes) -> bool: |
667 """Return True if the author field is well formed |
682 """Return True if the author field is well formed |
668 (ie "Contributor Name <contrib@email.dom>") |
683 (ie "Contributor Name <contrib@email.dom>") |
669 |
684 |
670 >>> isauthorwellformed(b'Good Author <good@author.com>') |
685 >>> isauthorwellformed(b'Good Author <good@author.com>') |
671 True |
686 True |
695 return text.splitlines()[0] |
710 return text.splitlines()[0] |
696 except IndexError: |
711 except IndexError: |
697 return b'' |
712 return b'' |
698 |
713 |
699 |
714 |
700 def ellipsis(text, maxlength=400): |
715 def ellipsis(text: bytes, maxlength: int = 400) -> bytes: |
701 """Trim string to at most maxlength (default: 400) columns in display.""" |
716 """Trim string to at most maxlength (default: 400) columns in display.""" |
702 return encoding.trim(text, maxlength, ellipsis=b'...') |
717 return encoding.trim(text, maxlength, ellipsis=b'...') |
703 |
718 |
704 |
719 |
705 def escapestr(s): |
720 def escapestr(s: bytes) -> bytes: |
|
721 # "bytes" is also a typing shortcut for bytes, bytearray, and memoryview |
706 if isinstance(s, memoryview): |
722 if isinstance(s, memoryview): |
707 s = bytes(s) |
723 s = bytes(s) |
708 # call underlying function of s.encode('string_escape') directly for |
724 # call underlying function of s.encode('string_escape') directly for |
709 # Python 3 compatibility |
725 # Python 3 compatibility |
710 return codecs.escape_encode(s)[0] # pytype: disable=module-attr |
726 return codecs.escape_encode(s)[0] # pytype: disable=module-attr |
711 |
727 |
712 |
728 |
713 def unescapestr(s): |
729 def unescapestr(s: bytes) -> bytes: |
714 return codecs.escape_decode(s)[0] # pytype: disable=module-attr |
730 return codecs.escape_decode(s)[0] # pytype: disable=module-attr |
715 |
731 |
716 |
732 |
717 def forcebytestr(obj): |
733 def forcebytestr(obj): |
718 """Portably format an arbitrary object (e.g. exception) into a byte |
734 """Portably format an arbitrary object (e.g. exception) into a byte |
722 except UnicodeEncodeError: |
738 except UnicodeEncodeError: |
723 # non-ascii string, may be lossy |
739 # non-ascii string, may be lossy |
724 return pycompat.bytestr(encoding.strtolocal(str(obj))) |
740 return pycompat.bytestr(encoding.strtolocal(str(obj))) |
725 |
741 |
726 |
742 |
727 def uirepr(s): |
743 def uirepr(s: bytes) -> bytes: |
728 # Avoid double backslash in Windows path repr() |
744 # Avoid double backslash in Windows path repr() |
729 return pycompat.byterepr(pycompat.bytestr(s)).replace(b'\\\\', b'\\') |
745 return pycompat.byterepr(pycompat.bytestr(s)).replace(b'\\\\', b'\\') |
730 |
746 |
731 |
747 |
732 # delay import of textwrap |
748 # delay import of textwrap |
836 global _MBTextWrapper |
852 global _MBTextWrapper |
837 _MBTextWrapper = tw |
853 _MBTextWrapper = tw |
838 return tw(**kwargs) |
854 return tw(**kwargs) |
839 |
855 |
840 |
856 |
841 def wrap(line, width, initindent=b'', hangindent=b''): |
857 def wrap( |
|
858 line: bytes, width: int, initindent: bytes = b'', hangindent: bytes = b'' |
|
859 ) -> bytes: |
842 maxindent = max(len(hangindent), len(initindent)) |
860 maxindent = max(len(hangindent), len(initindent)) |
843 if width <= maxindent: |
861 if width <= maxindent: |
844 # adjust for weird terminal size |
862 # adjust for weird terminal size |
845 width = max(78, maxindent + 1) |
863 width = max(78, maxindent + 1) |
846 line = line.decode( |
864 line = line.decode( |
873 b'off': False, |
891 b'off': False, |
874 b'never': False, |
892 b'never': False, |
875 } |
893 } |
876 |
894 |
877 |
895 |
878 def parsebool(s): |
896 def parsebool(s: bytes) -> Optional[bool]: |
879 """Parse s into a boolean. |
897 """Parse s into a boolean. |
880 |
898 |
881 If s is not a valid boolean, returns None. |
899 If s is not a valid boolean, returns None. |
882 """ |
900 """ |
883 return _booleans.get(s.lower(), None) |
901 return _booleans.get(s.lower(), None) |
884 |
902 |
885 |
903 |
886 def parselist(value): |
904 # TODO: make arg mandatory (and fix code below?) |
|
905 def parselist(value: Optional[bytes]): |
887 """parse a configuration value as a list of comma/space separated strings |
906 """parse a configuration value as a list of comma/space separated strings |
888 |
907 |
889 >>> parselist(b'this,is "a small" ,test') |
908 >>> parselist(b'this,is "a small" ,test') |
890 ['this', 'is', 'a small', 'test'] |
909 ['this', 'is', 'a small', 'test'] |
891 """ |
910 """ |