mercurial/encoding.py
changeset 41836 25694a78e4a4
parent 39839 9e8fcd2e78c1
child 43076 2372284d9457
equal deleted inserted replaced
41835:ddb174511f1b 41836:25694a78e4a4
    63 elif _nativeenviron:
    63 elif _nativeenviron:
    64     environ = os.environb  # re-exports
    64     environ = os.environb  # re-exports
    65 else:
    65 else:
    66     # preferred encoding isn't known yet; use utf-8 to avoid unicode error
    66     # preferred encoding isn't known yet; use utf-8 to avoid unicode error
    67     # and recreate it once encoding is settled
    67     # and recreate it once encoding is settled
    68     environ = dict((k.encode(u'utf-8'), v.encode(u'utf-8'))
    68     environ = dict((k.encode(r'utf-8'), v.encode(r'utf-8'))
    69                    for k, v in os.environ.items())  # re-exports
    69                    for k, v in os.environ.items())  # re-exports
    70 
    70 
    71 _encodingrewrites = {
    71 _encodingrewrites = {
    72     '646': 'ascii',
    72     '646': 'ascii',
    73     'ANSI_X3.4-1968': 'ascii',
    73     'ANSI_X3.4-1968': 'ascii',
   150             # make sure string is actually stored in UTF-8
   150             # make sure string is actually stored in UTF-8
   151             u = s.decode('UTF-8')
   151             u = s.decode('UTF-8')
   152             if encoding == 'UTF-8':
   152             if encoding == 'UTF-8':
   153                 # fast path
   153                 # fast path
   154                 return s
   154                 return s
   155             r = u.encode(_sysstr(encoding), u"replace")
   155             r = u.encode(_sysstr(encoding), r"replace")
   156             if u == r.decode(_sysstr(encoding)):
   156             if u == r.decode(_sysstr(encoding)):
   157                 # r is a safe, non-lossy encoding of s
   157                 # r is a safe, non-lossy encoding of s
   158                 return safelocalstr(r)
   158                 return safelocalstr(r)
   159             return localstr(s, r)
   159             return localstr(s, r)
   160         except UnicodeDecodeError:
   160         except UnicodeDecodeError:
   161             # we should only get here if we're looking at an ancient changeset
   161             # we should only get here if we're looking at an ancient changeset
   162             try:
   162             try:
   163                 u = s.decode(_sysstr(fallbackencoding))
   163                 u = s.decode(_sysstr(fallbackencoding))
   164                 r = u.encode(_sysstr(encoding), u"replace")
   164                 r = u.encode(_sysstr(encoding), r"replace")
   165                 if u == r.decode(_sysstr(encoding)):
   165                 if u == r.decode(_sysstr(encoding)):
   166                     # r is a safe, non-lossy encoding of s
   166                     # r is a safe, non-lossy encoding of s
   167                     return safelocalstr(r)
   167                     return safelocalstr(r)
   168                 return localstr(u.encode('UTF-8'), r)
   168                 return localstr(u.encode('UTF-8'), r)
   169             except UnicodeDecodeError:
   169             except UnicodeDecodeError:
   170                 u = s.decode("utf-8", "replace") # last ditch
   170                 u = s.decode("utf-8", "replace") # last ditch
   171                 # can't round-trip
   171                 # can't round-trip
   172                 return u.encode(_sysstr(encoding), u"replace")
   172                 return u.encode(_sysstr(encoding), r"replace")
   173     except LookupError as k:
   173     except LookupError as k:
   174         raise error.Abort(k, hint="please check your locale settings")
   174         raise error.Abort(k, hint="please check your locale settings")
   175 
   175 
   176 def fromlocal(s):
   176 def fromlocal(s):
   177     """
   177     """
   228     strmethod = pycompat.identity
   228     strmethod = pycompat.identity
   229 
   229 
   230 if not _nativeenviron:
   230 if not _nativeenviron:
   231     # now encoding and helper functions are available, recreate the environ
   231     # now encoding and helper functions are available, recreate the environ
   232     # dict to be exported to other modules
   232     # dict to be exported to other modules
   233     environ = dict((tolocal(k.encode(u'utf-8')), tolocal(v.encode(u'utf-8')))
   233     environ = dict((tolocal(k.encode(r'utf-8')), tolocal(v.encode(r'utf-8')))
   234                    for k, v in os.environ.items())  # re-exports
   234                    for k, v in os.environ.items())  # re-exports
   235 
   235 
   236 if pycompat.ispy3:
   236 if pycompat.ispy3:
   237     # os.getcwd() on Python 3 returns string, but it has os.getcwdb() which
   237     # os.getcwd() on Python 3 returns string, but it has os.getcwdb() which
   238     # returns bytes.
   238     # returns bytes.
   249 _wide = _sysstr(environ.get("HGENCODINGAMBIGUOUS", "narrow") == "wide"
   249 _wide = _sysstr(environ.get("HGENCODINGAMBIGUOUS", "narrow") == "wide"
   250                 and "WFA" or "WF")
   250                 and "WFA" or "WF")
   251 
   251 
   252 def colwidth(s):
   252 def colwidth(s):
   253     "Find the column width of a string for display in the local encoding"
   253     "Find the column width of a string for display in the local encoding"
   254     return ucolwidth(s.decode(_sysstr(encoding), u'replace'))
   254     return ucolwidth(s.decode(_sysstr(encoding), r'replace'))
   255 
   255 
   256 def ucolwidth(d):
   256 def ucolwidth(d):
   257     "Find the column width of a Unicode string for display"
   257     "Find the column width of a Unicode string for display"
   258     eaw = getattr(unicodedata, 'east_asian_width', None)
   258     eaw = getattr(unicodedata, 'east_asian_width', None)
   259     if eaw is not None:
   259     if eaw is not None: