mercurial/util.py
changeset 37083 f99d64e8a4e4
parent 37082 1a1d1c44b570
child 37084 f0b6fbea00cf
equal deleted inserted replaced
37082:1a1d1c44b570 37083:f99d64e8a4e4
    15 
    15 
    16 from __future__ import absolute_import, print_function
    16 from __future__ import absolute_import, print_function
    17 
    17 
    18 import abc
    18 import abc
    19 import bz2
    19 import bz2
    20 import codecs
       
    21 import collections
    20 import collections
    22 import contextlib
    21 import contextlib
    23 import errno
    22 import errno
    24 import gc
    23 import gc
    25 import hashlib
    24 import hashlib
    35 import socket
    34 import socket
    36 import stat
    35 import stat
    37 import subprocess
    36 import subprocess
    38 import sys
    37 import sys
    39 import tempfile
    38 import tempfile
    40 import textwrap
       
    41 import time
    39 import time
    42 import traceback
    40 import traceback
    43 import warnings
    41 import warnings
    44 import zlib
    42 import zlib
    45 
    43 
    50     node as nodemod,
    48     node as nodemod,
    51     policy,
    49     policy,
    52     pycompat,
    50     pycompat,
    53     urllibcompat,
    51     urllibcompat,
    54 )
    52 )
    55 from .utils import dateutil
    53 from .utils import (
       
    54     dateutil,
       
    55     stringutil,
       
    56 )
    56 
    57 
    57 base85 = policy.importmod(r'base85')
    58 base85 = policy.importmod(r'base85')
    58 osutil = policy.importmod(r'osutil')
    59 osutil = policy.importmod(r'osutil')
    59 parsers = policy.importmod(r'parsers')
    60 parsers = policy.importmod(r'parsers')
    60 
    61 
   806 
   807 
   807     def setsockopt(self, *args, **kwargs):
   808     def setsockopt(self, *args, **kwargs):
   808         return object.__getattribute__(self, r'_observedcall')(
   809         return object.__getattribute__(self, r'_observedcall')(
   809             r'setsockopt', *args, **kwargs)
   810             r'setsockopt', *args, **kwargs)
   810 
   811 
   811 _DATA_ESCAPE_MAP = {pycompat.bytechr(i): br'\x%02x' % i for i in range(256)}
       
   812 _DATA_ESCAPE_MAP.update({
       
   813     b'\\': b'\\\\',
       
   814     b'\r': br'\r',
       
   815     b'\n': br'\n',
       
   816 })
       
   817 _DATA_ESCAPE_RE = remod.compile(br'[\x00-\x08\x0a-\x1f\\\x7f-\xff]')
       
   818 
       
   819 def escapedata(s):
       
   820     if isinstance(s, bytearray):
       
   821         s = bytes(s)
       
   822 
       
   823     return _DATA_ESCAPE_RE.sub(lambda m: _DATA_ESCAPE_MAP[m.group(0)], s)
       
   824 
       
   825 class baseproxyobserver(object):
   812 class baseproxyobserver(object):
   826     def _writedata(self, data):
   813     def _writedata(self, data):
   827         if not self.logdata:
   814         if not self.logdata:
   828             if self.logdataapis:
   815             if self.logdataapis:
   829                 self.fh.write('\n')
   816                 self.fh.write('\n')
  1564     "filter a string through a command that transforms its input to its output"
  1551     "filter a string through a command that transforms its input to its output"
  1565     for name, fn in filtertable.iteritems():
  1552     for name, fn in filtertable.iteritems():
  1566         if cmd.startswith(name):
  1553         if cmd.startswith(name):
  1567             return fn(s, cmd[len(name):].lstrip())
  1554             return fn(s, cmd[len(name):].lstrip())
  1568     return pipefilter(s, cmd)
  1555     return pipefilter(s, cmd)
  1569 
       
  1570 def binary(s):
       
  1571     """return true if a string is binary data"""
       
  1572     return bool(s and '\0' in s)
       
  1573 
  1556 
  1574 def increasingchunks(source, min=1024, max=65536):
  1557 def increasingchunks(source, min=1024, max=65536):
  1575     '''return no less than min bytes per chunk while data remains,
  1558     '''return no less than min bytes per chunk while data remains,
  1576     doubling min after each chunk until it reaches max'''
  1559     doubling min after each chunk until it reaches max'''
  1577     def log2(x):
  1560     def log2(x):
  2569             return None
  2552             return None
  2570 
  2553 
  2571         b[0:len(res)] = res
  2554         b[0:len(res)] = res
  2572         return len(res)
  2555         return len(res)
  2573 
  2556 
  2574 def stringmatcher(pattern, casesensitive=True):
       
  2575     """
       
  2576     accepts a string, possibly starting with 're:' or 'literal:' prefix.
       
  2577     returns the matcher name, pattern, and matcher function.
       
  2578     missing or unknown prefixes are treated as literal matches.
       
  2579 
       
  2580     helper for tests:
       
  2581     >>> def test(pattern, *tests):
       
  2582     ...     kind, pattern, matcher = stringmatcher(pattern)
       
  2583     ...     return (kind, pattern, [bool(matcher(t)) for t in tests])
       
  2584     >>> def itest(pattern, *tests):
       
  2585     ...     kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
       
  2586     ...     return (kind, pattern, [bool(matcher(t)) for t in tests])
       
  2587 
       
  2588     exact matching (no prefix):
       
  2589     >>> test(b'abcdefg', b'abc', b'def', b'abcdefg')
       
  2590     ('literal', 'abcdefg', [False, False, True])
       
  2591 
       
  2592     regex matching ('re:' prefix)
       
  2593     >>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')
       
  2594     ('re', 'a.+b', [False, False, True])
       
  2595 
       
  2596     force exact matches ('literal:' prefix)
       
  2597     >>> test(b'literal:re:foobar', b'foobar', b're:foobar')
       
  2598     ('literal', 're:foobar', [False, True])
       
  2599 
       
  2600     unknown prefixes are ignored and treated as literals
       
  2601     >>> test(b'foo:bar', b'foo', b'bar', b'foo:bar')
       
  2602     ('literal', 'foo:bar', [False, False, True])
       
  2603 
       
  2604     case insensitive regex matches
       
  2605     >>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')
       
  2606     ('re', 'A.+b', [False, False, True])
       
  2607 
       
  2608     case insensitive literal matches
       
  2609     >>> itest(b'ABCDEFG', b'abc', b'def', b'abcdefg')
       
  2610     ('literal', 'ABCDEFG', [False, False, True])
       
  2611     """
       
  2612     if pattern.startswith('re:'):
       
  2613         pattern = pattern[3:]
       
  2614         try:
       
  2615             flags = 0
       
  2616             if not casesensitive:
       
  2617                 flags = remod.I
       
  2618             regex = remod.compile(pattern, flags)
       
  2619         except remod.error as e:
       
  2620             raise error.ParseError(_('invalid regular expression: %s')
       
  2621                                    % e)
       
  2622         return 're', pattern, regex.search
       
  2623     elif pattern.startswith('literal:'):
       
  2624         pattern = pattern[8:]
       
  2625 
       
  2626     match = pattern.__eq__
       
  2627 
       
  2628     if not casesensitive:
       
  2629         ipat = encoding.lower(pattern)
       
  2630         match = lambda s: ipat == encoding.lower(s)
       
  2631     return 'literal', pattern, match
       
  2632 
       
  2633 def shortuser(user):
       
  2634     """Return a short representation of a user name or email address."""
       
  2635     f = user.find('@')
       
  2636     if f >= 0:
       
  2637         user = user[:f]
       
  2638     f = user.find('<')
       
  2639     if f >= 0:
       
  2640         user = user[f + 1:]
       
  2641     f = user.find(' ')
       
  2642     if f >= 0:
       
  2643         user = user[:f]
       
  2644     f = user.find('.')
       
  2645     if f >= 0:
       
  2646         user = user[:f]
       
  2647     return user
       
  2648 
       
  2649 def emailuser(user):
       
  2650     """Return the user portion of an email address."""
       
  2651     f = user.find('@')
       
  2652     if f >= 0:
       
  2653         user = user[:f]
       
  2654     f = user.find('<')
       
  2655     if f >= 0:
       
  2656         user = user[f + 1:]
       
  2657     return user
       
  2658 
       
  2659 def email(author):
       
  2660     '''get email of author.'''
       
  2661     r = author.find('>')
       
  2662     if r == -1:
       
  2663         r = None
       
  2664     return author[author.find('<') + 1:r]
       
  2665 
       
  2666 def ellipsis(text, maxlength=400):
       
  2667     """Trim string to at most maxlength (default: 400) columns in display."""
       
  2668     return encoding.trim(text, maxlength, ellipsis='...')
       
  2669 
       
  2670 def unitcountfn(*unittable):
  2557 def unitcountfn(*unittable):
  2671     '''return a function that renders a readable count of some quantity'''
  2558     '''return a function that renders a readable count of some quantity'''
  2672 
  2559 
  2673     def go(count):
  2560     def go(count):
  2674         for multiplier, divisor, format in unittable:
  2561         for multiplier, divisor, format in unittable:
  2748     nativeeolwriter = _crlfwriter
  2635     nativeeolwriter = _crlfwriter
  2749 else:
  2636 else:
  2750     tonativeeol = pycompat.identity
  2637     tonativeeol = pycompat.identity
  2751     fromnativeeol = pycompat.identity
  2638     fromnativeeol = pycompat.identity
  2752     nativeeolwriter = pycompat.identity
  2639     nativeeolwriter = pycompat.identity
  2753 
       
  2754 def escapestr(s):
       
  2755     # call underlying function of s.encode('string_escape') directly for
       
  2756     # Python 3 compatibility
       
  2757     return codecs.escape_encode(s)[0]
       
  2758 
       
  2759 def unescapestr(s):
       
  2760     return codecs.escape_decode(s)[0]
       
  2761 
       
  2762 def forcebytestr(obj):
       
  2763     """Portably format an arbitrary object (e.g. exception) into a byte
       
  2764     string."""
       
  2765     try:
       
  2766         return pycompat.bytestr(obj)
       
  2767     except UnicodeEncodeError:
       
  2768         # non-ascii string, may be lossy
       
  2769         return pycompat.bytestr(encoding.strtolocal(str(obj)))
       
  2770 
       
  2771 def uirepr(s):
       
  2772     # Avoid double backslash in Windows path repr()
       
  2773     return pycompat.byterepr(pycompat.bytestr(s)).replace(b'\\\\', b'\\')
       
  2774 
       
  2775 # delay import of textwrap
       
  2776 def _MBTextWrapper(**kwargs):
       
  2777     class tw(textwrap.TextWrapper):
       
  2778         """
       
  2779         Extend TextWrapper for width-awareness.
       
  2780 
       
  2781         Neither number of 'bytes' in any encoding nor 'characters' is
       
  2782         appropriate to calculate terminal columns for specified string.
       
  2783 
       
  2784         Original TextWrapper implementation uses built-in 'len()' directly,
       
  2785         so overriding is needed to use width information of each characters.
       
  2786 
       
  2787         In addition, characters classified into 'ambiguous' width are
       
  2788         treated as wide in East Asian area, but as narrow in other.
       
  2789 
       
  2790         This requires use decision to determine width of such characters.
       
  2791         """
       
  2792         def _cutdown(self, ucstr, space_left):
       
  2793             l = 0
       
  2794             colwidth = encoding.ucolwidth
       
  2795             for i in xrange(len(ucstr)):
       
  2796                 l += colwidth(ucstr[i])
       
  2797                 if space_left < l:
       
  2798                     return (ucstr[:i], ucstr[i:])
       
  2799             return ucstr, ''
       
  2800 
       
  2801         # overriding of base class
       
  2802         def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
       
  2803             space_left = max(width - cur_len, 1)
       
  2804 
       
  2805             if self.break_long_words:
       
  2806                 cut, res = self._cutdown(reversed_chunks[-1], space_left)
       
  2807                 cur_line.append(cut)
       
  2808                 reversed_chunks[-1] = res
       
  2809             elif not cur_line:
       
  2810                 cur_line.append(reversed_chunks.pop())
       
  2811 
       
  2812         # this overriding code is imported from TextWrapper of Python 2.6
       
  2813         # to calculate columns of string by 'encoding.ucolwidth()'
       
  2814         def _wrap_chunks(self, chunks):
       
  2815             colwidth = encoding.ucolwidth
       
  2816 
       
  2817             lines = []
       
  2818             if self.width <= 0:
       
  2819                 raise ValueError("invalid width %r (must be > 0)" % self.width)
       
  2820 
       
  2821             # Arrange in reverse order so items can be efficiently popped
       
  2822             # from a stack of chucks.
       
  2823             chunks.reverse()
       
  2824 
       
  2825             while chunks:
       
  2826 
       
  2827                 # Start the list of chunks that will make up the current line.
       
  2828                 # cur_len is just the length of all the chunks in cur_line.
       
  2829                 cur_line = []
       
  2830                 cur_len = 0
       
  2831 
       
  2832                 # Figure out which static string will prefix this line.
       
  2833                 if lines:
       
  2834                     indent = self.subsequent_indent
       
  2835                 else:
       
  2836                     indent = self.initial_indent
       
  2837 
       
  2838                 # Maximum width for this line.
       
  2839                 width = self.width - len(indent)
       
  2840 
       
  2841                 # First chunk on line is whitespace -- drop it, unless this
       
  2842                 # is the very beginning of the text (i.e. no lines started yet).
       
  2843                 if self.drop_whitespace and chunks[-1].strip() == r'' and lines:
       
  2844                     del chunks[-1]
       
  2845 
       
  2846                 while chunks:
       
  2847                     l = colwidth(chunks[-1])
       
  2848 
       
  2849                     # Can at least squeeze this chunk onto the current line.
       
  2850                     if cur_len + l <= width:
       
  2851                         cur_line.append(chunks.pop())
       
  2852                         cur_len += l
       
  2853 
       
  2854                     # Nope, this line is full.
       
  2855                     else:
       
  2856                         break
       
  2857 
       
  2858                 # The current line is full, and the next chunk is too big to
       
  2859                 # fit on *any* line (not just this one).
       
  2860                 if chunks and colwidth(chunks[-1]) > width:
       
  2861                     self._handle_long_word(chunks, cur_line, cur_len, width)
       
  2862 
       
  2863                 # If the last chunk on this line is all whitespace, drop it.
       
  2864                 if (self.drop_whitespace and
       
  2865                     cur_line and cur_line[-1].strip() == r''):
       
  2866                     del cur_line[-1]
       
  2867 
       
  2868                 # Convert current line back to a string and store it in list
       
  2869                 # of all lines (return value).
       
  2870                 if cur_line:
       
  2871                     lines.append(indent + r''.join(cur_line))
       
  2872 
       
  2873             return lines
       
  2874 
       
  2875     global _MBTextWrapper
       
  2876     _MBTextWrapper = tw
       
  2877     return tw(**kwargs)
       
  2878 
       
  2879 def wrap(line, width, initindent='', hangindent=''):
       
  2880     maxindent = max(len(hangindent), len(initindent))
       
  2881     if width <= maxindent:
       
  2882         # adjust for weird terminal size
       
  2883         width = max(78, maxindent + 1)
       
  2884     line = line.decode(pycompat.sysstr(encoding.encoding),
       
  2885                        pycompat.sysstr(encoding.encodingmode))
       
  2886     initindent = initindent.decode(pycompat.sysstr(encoding.encoding),
       
  2887                                    pycompat.sysstr(encoding.encodingmode))
       
  2888     hangindent = hangindent.decode(pycompat.sysstr(encoding.encoding),
       
  2889                                    pycompat.sysstr(encoding.encodingmode))
       
  2890     wrapper = _MBTextWrapper(width=width,
       
  2891                              initial_indent=initindent,
       
  2892                              subsequent_indent=hangindent)
       
  2893     return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
       
  2894 
  2640 
  2895 if (pyplatform.python_implementation() == 'CPython' and
  2641 if (pyplatform.python_implementation() == 'CPython' and
  2896     sys.version_info < (3, 0)):
  2642     sys.version_info < (3, 0)):
  2897     # There is an issue in CPython that some IO methods do not handle EINTR
  2643     # There is an issue in CPython that some IO methods do not handle EINTR
  2898     # correctly. The following table shows what CPython version (and functions)
  2644     # correctly. The following table shows what CPython version (and functions)
  3062     try:
  2808     try:
  3063         return socket.getservbyname(pycompat.sysstr(port))
  2809         return socket.getservbyname(pycompat.sysstr(port))
  3064     except socket.error:
  2810     except socket.error:
  3065         raise Abort(_("no port number associated with service '%s'") % port)
  2811         raise Abort(_("no port number associated with service '%s'") % port)
  3066 
  2812 
  3067 _booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True,
       
  3068              '0': False, 'no': False, 'false': False, 'off': False,
       
  3069              'never': False}
       
  3070 
       
  3071 def parsebool(s):
       
  3072     """Parse s into a boolean.
       
  3073 
       
  3074     If s is not a valid boolean, returns None.
       
  3075     """
       
  3076     return _booleans.get(s.lower(), None)
       
  3077 
       
  3078 class url(object):
  2813 class url(object):
  3079     r"""Reliable URL parser.
  2814     r"""Reliable URL parser.
  3080 
  2815 
  3081     This parses URLs and provides attributes for the following
  2816     This parses URLs and provides attributes for the following
  3082     components:
  2817     components:
  4339 shortdate = _deprecatedfunc(dateutil.shortdate, '4.6')
  4074 shortdate = _deprecatedfunc(dateutil.shortdate, '4.6')
  4340 parsetimezone = _deprecatedfunc(dateutil.parsetimezone, '4.6')
  4075 parsetimezone = _deprecatedfunc(dateutil.parsetimezone, '4.6')
  4341 strdate = _deprecatedfunc(dateutil.strdate, '4.6')
  4076 strdate = _deprecatedfunc(dateutil.strdate, '4.6')
  4342 parsedate = _deprecatedfunc(dateutil.parsedate, '4.6')
  4077 parsedate = _deprecatedfunc(dateutil.parsedate, '4.6')
  4343 matchdate = _deprecatedfunc(dateutil.matchdate, '4.6')
  4078 matchdate = _deprecatedfunc(dateutil.matchdate, '4.6')
       
  4079 
       
  4080 def _deprecatedfunc(func, version):  # TODO
       
  4081     return func
       
  4082 escapedata = _deprecatedfunc(stringutil.escapedata, '4.6')
       
  4083 binary = _deprecatedfunc(stringutil.binary, '4.6')
       
  4084 stringmatcher = _deprecatedfunc(stringutil.stringmatcher, '4.6')
       
  4085 shortuser = _deprecatedfunc(stringutil.shortuser, '4.6')
       
  4086 emailuser = _deprecatedfunc(stringutil.emailuser, '4.6')
       
  4087 email = _deprecatedfunc(stringutil.email, '4.6')
       
  4088 ellipsis = _deprecatedfunc(stringutil.ellipsis, '4.6')
       
  4089 escapestr = _deprecatedfunc(stringutil.escapestr, '4.6')
       
  4090 unescapestr = _deprecatedfunc(stringutil.unescapestr, '4.6')
       
  4091 forcebytestr = _deprecatedfunc(stringutil.forcebytestr, '4.6')
       
  4092 uirepr = _deprecatedfunc(stringutil.uirepr, '4.6')
       
  4093 wrap = _deprecatedfunc(stringutil.wrap, '4.6')
       
  4094 parsebool = _deprecatedfunc(stringutil.parsebool, '4.6')