1564 "filter a string through a command that transforms its input to its output" |
1551 "filter a string through a command that transforms its input to its output" |
1565 for name, fn in filtertable.iteritems(): |
1552 for name, fn in filtertable.iteritems(): |
1566 if cmd.startswith(name): |
1553 if cmd.startswith(name): |
1567 return fn(s, cmd[len(name):].lstrip()) |
1554 return fn(s, cmd[len(name):].lstrip()) |
1568 return pipefilter(s, cmd) |
1555 return pipefilter(s, cmd) |
1569 |
|
1570 def binary(s): |
|
1571 """return true if a string is binary data""" |
|
1572 return bool(s and '\0' in s) |
|
1573 |
1556 |
1574 def increasingchunks(source, min=1024, max=65536): |
1557 def increasingchunks(source, min=1024, max=65536): |
1575 '''return no less than min bytes per chunk while data remains, |
1558 '''return no less than min bytes per chunk while data remains, |
1576 doubling min after each chunk until it reaches max''' |
1559 doubling min after each chunk until it reaches max''' |
1577 def log2(x): |
1560 def log2(x): |
2569 return None |
2552 return None |
2570 |
2553 |
2571 b[0:len(res)] = res |
2554 b[0:len(res)] = res |
2572 return len(res) |
2555 return len(res) |
2573 |
2556 |
2574 def stringmatcher(pattern, casesensitive=True): |
|
2575 """ |
|
2576 accepts a string, possibly starting with 're:' or 'literal:' prefix. |
|
2577 returns the matcher name, pattern, and matcher function. |
|
2578 missing or unknown prefixes are treated as literal matches. |
|
2579 |
|
2580 helper for tests: |
|
2581 >>> def test(pattern, *tests): |
|
2582 ... kind, pattern, matcher = stringmatcher(pattern) |
|
2583 ... return (kind, pattern, [bool(matcher(t)) for t in tests]) |
|
2584 >>> def itest(pattern, *tests): |
|
2585 ... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False) |
|
2586 ... return (kind, pattern, [bool(matcher(t)) for t in tests]) |
|
2587 |
|
2588 exact matching (no prefix): |
|
2589 >>> test(b'abcdefg', b'abc', b'def', b'abcdefg') |
|
2590 ('literal', 'abcdefg', [False, False, True]) |
|
2591 |
|
2592 regex matching ('re:' prefix) |
|
2593 >>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar') |
|
2594 ('re', 'a.+b', [False, False, True]) |
|
2595 |
|
2596 force exact matches ('literal:' prefix) |
|
2597 >>> test(b'literal:re:foobar', b'foobar', b're:foobar') |
|
2598 ('literal', 're:foobar', [False, True]) |
|
2599 |
|
2600 unknown prefixes are ignored and treated as literals |
|
2601 >>> test(b'foo:bar', b'foo', b'bar', b'foo:bar') |
|
2602 ('literal', 'foo:bar', [False, False, True]) |
|
2603 |
|
2604 case insensitive regex matches |
|
2605 >>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar') |
|
2606 ('re', 'A.+b', [False, False, True]) |
|
2607 |
|
2608 case insensitive literal matches |
|
2609 >>> itest(b'ABCDEFG', b'abc', b'def', b'abcdefg') |
|
2610 ('literal', 'ABCDEFG', [False, False, True]) |
|
2611 """ |
|
2612 if pattern.startswith('re:'): |
|
2613 pattern = pattern[3:] |
|
2614 try: |
|
2615 flags = 0 |
|
2616 if not casesensitive: |
|
2617 flags = remod.I |
|
2618 regex = remod.compile(pattern, flags) |
|
2619 except remod.error as e: |
|
2620 raise error.ParseError(_('invalid regular expression: %s') |
|
2621 % e) |
|
2622 return 're', pattern, regex.search |
|
2623 elif pattern.startswith('literal:'): |
|
2624 pattern = pattern[8:] |
|
2625 |
|
2626 match = pattern.__eq__ |
|
2627 |
|
2628 if not casesensitive: |
|
2629 ipat = encoding.lower(pattern) |
|
2630 match = lambda s: ipat == encoding.lower(s) |
|
2631 return 'literal', pattern, match |
|
2632 |
|
2633 def shortuser(user): |
|
2634 """Return a short representation of a user name or email address.""" |
|
2635 f = user.find('@') |
|
2636 if f >= 0: |
|
2637 user = user[:f] |
|
2638 f = user.find('<') |
|
2639 if f >= 0: |
|
2640 user = user[f + 1:] |
|
2641 f = user.find(' ') |
|
2642 if f >= 0: |
|
2643 user = user[:f] |
|
2644 f = user.find('.') |
|
2645 if f >= 0: |
|
2646 user = user[:f] |
|
2647 return user |
|
2648 |
|
2649 def emailuser(user): |
|
2650 """Return the user portion of an email address.""" |
|
2651 f = user.find('@') |
|
2652 if f >= 0: |
|
2653 user = user[:f] |
|
2654 f = user.find('<') |
|
2655 if f >= 0: |
|
2656 user = user[f + 1:] |
|
2657 return user |
|
2658 |
|
2659 def email(author): |
|
2660 '''get email of author.''' |
|
2661 r = author.find('>') |
|
2662 if r == -1: |
|
2663 r = None |
|
2664 return author[author.find('<') + 1:r] |
|
2665 |
|
2666 def ellipsis(text, maxlength=400): |
|
2667 """Trim string to at most maxlength (default: 400) columns in display.""" |
|
2668 return encoding.trim(text, maxlength, ellipsis='...') |
|
2669 |
|
2670 def unitcountfn(*unittable): |
2557 def unitcountfn(*unittable): |
2671 '''return a function that renders a readable count of some quantity''' |
2558 '''return a function that renders a readable count of some quantity''' |
2672 |
2559 |
2673 def go(count): |
2560 def go(count): |
2674 for multiplier, divisor, format in unittable: |
2561 for multiplier, divisor, format in unittable: |
2748 nativeeolwriter = _crlfwriter |
2635 nativeeolwriter = _crlfwriter |
2749 else: |
2636 else: |
2750 tonativeeol = pycompat.identity |
2637 tonativeeol = pycompat.identity |
2751 fromnativeeol = pycompat.identity |
2638 fromnativeeol = pycompat.identity |
2752 nativeeolwriter = pycompat.identity |
2639 nativeeolwriter = pycompat.identity |
2753 |
|
2754 def escapestr(s): |
|
2755 # call underlying function of s.encode('string_escape') directly for |
|
2756 # Python 3 compatibility |
|
2757 return codecs.escape_encode(s)[0] |
|
2758 |
|
2759 def unescapestr(s): |
|
2760 return codecs.escape_decode(s)[0] |
|
2761 |
|
2762 def forcebytestr(obj): |
|
2763 """Portably format an arbitrary object (e.g. exception) into a byte |
|
2764 string.""" |
|
2765 try: |
|
2766 return pycompat.bytestr(obj) |
|
2767 except UnicodeEncodeError: |
|
2768 # non-ascii string, may be lossy |
|
2769 return pycompat.bytestr(encoding.strtolocal(str(obj))) |
|
2770 |
|
2771 def uirepr(s): |
|
2772 # Avoid double backslash in Windows path repr() |
|
2773 return pycompat.byterepr(pycompat.bytestr(s)).replace(b'\\\\', b'\\') |
|
2774 |
|
2775 # delay import of textwrap |
|
2776 def _MBTextWrapper(**kwargs): |
|
2777 class tw(textwrap.TextWrapper): |
|
2778 """ |
|
2779 Extend TextWrapper for width-awareness. |
|
2780 |
|
2781 Neither number of 'bytes' in any encoding nor 'characters' is |
|
2782 appropriate to calculate terminal columns for specified string. |
|
2783 |
|
2784 Original TextWrapper implementation uses built-in 'len()' directly, |
|
2785 so overriding is needed to use width information of each characters. |
|
2786 |
|
2787 In addition, characters classified into 'ambiguous' width are |
|
2788 treated as wide in East Asian area, but as narrow in other. |
|
2789 |
|
2790 This requires use decision to determine width of such characters. |
|
2791 """ |
|
2792 def _cutdown(self, ucstr, space_left): |
|
2793 l = 0 |
|
2794 colwidth = encoding.ucolwidth |
|
2795 for i in xrange(len(ucstr)): |
|
2796 l += colwidth(ucstr[i]) |
|
2797 if space_left < l: |
|
2798 return (ucstr[:i], ucstr[i:]) |
|
2799 return ucstr, '' |
|
2800 |
|
2801 # overriding of base class |
|
2802 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width): |
|
2803 space_left = max(width - cur_len, 1) |
|
2804 |
|
2805 if self.break_long_words: |
|
2806 cut, res = self._cutdown(reversed_chunks[-1], space_left) |
|
2807 cur_line.append(cut) |
|
2808 reversed_chunks[-1] = res |
|
2809 elif not cur_line: |
|
2810 cur_line.append(reversed_chunks.pop()) |
|
2811 |
|
2812 # this overriding code is imported from TextWrapper of Python 2.6 |
|
2813 # to calculate columns of string by 'encoding.ucolwidth()' |
|
2814 def _wrap_chunks(self, chunks): |
|
2815 colwidth = encoding.ucolwidth |
|
2816 |
|
2817 lines = [] |
|
2818 if self.width <= 0: |
|
2819 raise ValueError("invalid width %r (must be > 0)" % self.width) |
|
2820 |
|
2821 # Arrange in reverse order so items can be efficiently popped |
|
2822 # from a stack of chucks. |
|
2823 chunks.reverse() |
|
2824 |
|
2825 while chunks: |
|
2826 |
|
2827 # Start the list of chunks that will make up the current line. |
|
2828 # cur_len is just the length of all the chunks in cur_line. |
|
2829 cur_line = [] |
|
2830 cur_len = 0 |
|
2831 |
|
2832 # Figure out which static string will prefix this line. |
|
2833 if lines: |
|
2834 indent = self.subsequent_indent |
|
2835 else: |
|
2836 indent = self.initial_indent |
|
2837 |
|
2838 # Maximum width for this line. |
|
2839 width = self.width - len(indent) |
|
2840 |
|
2841 # First chunk on line is whitespace -- drop it, unless this |
|
2842 # is the very beginning of the text (i.e. no lines started yet). |
|
2843 if self.drop_whitespace and chunks[-1].strip() == r'' and lines: |
|
2844 del chunks[-1] |
|
2845 |
|
2846 while chunks: |
|
2847 l = colwidth(chunks[-1]) |
|
2848 |
|
2849 # Can at least squeeze this chunk onto the current line. |
|
2850 if cur_len + l <= width: |
|
2851 cur_line.append(chunks.pop()) |
|
2852 cur_len += l |
|
2853 |
|
2854 # Nope, this line is full. |
|
2855 else: |
|
2856 break |
|
2857 |
|
2858 # The current line is full, and the next chunk is too big to |
|
2859 # fit on *any* line (not just this one). |
|
2860 if chunks and colwidth(chunks[-1]) > width: |
|
2861 self._handle_long_word(chunks, cur_line, cur_len, width) |
|
2862 |
|
2863 # If the last chunk on this line is all whitespace, drop it. |
|
2864 if (self.drop_whitespace and |
|
2865 cur_line and cur_line[-1].strip() == r''): |
|
2866 del cur_line[-1] |
|
2867 |
|
2868 # Convert current line back to a string and store it in list |
|
2869 # of all lines (return value). |
|
2870 if cur_line: |
|
2871 lines.append(indent + r''.join(cur_line)) |
|
2872 |
|
2873 return lines |
|
2874 |
|
2875 global _MBTextWrapper |
|
2876 _MBTextWrapper = tw |
|
2877 return tw(**kwargs) |
|
2878 |
|
2879 def wrap(line, width, initindent='', hangindent=''): |
|
2880 maxindent = max(len(hangindent), len(initindent)) |
|
2881 if width <= maxindent: |
|
2882 # adjust for weird terminal size |
|
2883 width = max(78, maxindent + 1) |
|
2884 line = line.decode(pycompat.sysstr(encoding.encoding), |
|
2885 pycompat.sysstr(encoding.encodingmode)) |
|
2886 initindent = initindent.decode(pycompat.sysstr(encoding.encoding), |
|
2887 pycompat.sysstr(encoding.encodingmode)) |
|
2888 hangindent = hangindent.decode(pycompat.sysstr(encoding.encoding), |
|
2889 pycompat.sysstr(encoding.encodingmode)) |
|
2890 wrapper = _MBTextWrapper(width=width, |
|
2891 initial_indent=initindent, |
|
2892 subsequent_indent=hangindent) |
|
2893 return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding)) |
|
2894 |
2640 |
2895 if (pyplatform.python_implementation() == 'CPython' and |
2641 if (pyplatform.python_implementation() == 'CPython' and |
2896 sys.version_info < (3, 0)): |
2642 sys.version_info < (3, 0)): |
2897 # There is an issue in CPython that some IO methods do not handle EINTR |
2643 # There is an issue in CPython that some IO methods do not handle EINTR |
2898 # correctly. The following table shows what CPython version (and functions) |
2644 # correctly. The following table shows what CPython version (and functions) |
4339 shortdate = _deprecatedfunc(dateutil.shortdate, '4.6') |
4074 shortdate = _deprecatedfunc(dateutil.shortdate, '4.6') |
4340 parsetimezone = _deprecatedfunc(dateutil.parsetimezone, '4.6') |
4075 parsetimezone = _deprecatedfunc(dateutil.parsetimezone, '4.6') |
4341 strdate = _deprecatedfunc(dateutil.strdate, '4.6') |
4076 strdate = _deprecatedfunc(dateutil.strdate, '4.6') |
4342 parsedate = _deprecatedfunc(dateutil.parsedate, '4.6') |
4077 parsedate = _deprecatedfunc(dateutil.parsedate, '4.6') |
4343 matchdate = _deprecatedfunc(dateutil.matchdate, '4.6') |
4078 matchdate = _deprecatedfunc(dateutil.matchdate, '4.6') |
|
4079 |
|
4080 def _deprecatedfunc(func, version): # TODO |
|
4081 return func |
|
4082 escapedata = _deprecatedfunc(stringutil.escapedata, '4.6') |
|
4083 binary = _deprecatedfunc(stringutil.binary, '4.6') |
|
4084 stringmatcher = _deprecatedfunc(stringutil.stringmatcher, '4.6') |
|
4085 shortuser = _deprecatedfunc(stringutil.shortuser, '4.6') |
|
4086 emailuser = _deprecatedfunc(stringutil.emailuser, '4.6') |
|
4087 email = _deprecatedfunc(stringutil.email, '4.6') |
|
4088 ellipsis = _deprecatedfunc(stringutil.ellipsis, '4.6') |
|
4089 escapestr = _deprecatedfunc(stringutil.escapestr, '4.6') |
|
4090 unescapestr = _deprecatedfunc(stringutil.unescapestr, '4.6') |
|
4091 forcebytestr = _deprecatedfunc(stringutil.forcebytestr, '4.6') |
|
4092 uirepr = _deprecatedfunc(stringutil.uirepr, '4.6') |
|
4093 wrap = _deprecatedfunc(stringutil.wrap, '4.6') |
|
4094 parsebool = _deprecatedfunc(stringutil.parsebool, '4.6') |