move encoding bits from util to encoding
authorMatt Mackall <mpm@selenic.com>
Fri, 03 Apr 2009 14:51:48 -0500
changeset 7948 de377b1a9a84
parent 7947 a454eeb1b827
child 7949 443c0c8636ac
move encoding bits from util to encoding In addition to cleaning up util, this gets rid of some circular dependencies.
hgext/convert/convcmd.py
hgext/highlight/highlight.py
hgext/win32mbcs.py
mercurial/changelog.py
mercurial/cmdutil.py
mercurial/commands.py
mercurial/dispatch.py
mercurial/encoding.py
mercurial/hgweb/hgweb_mod.py
mercurial/hgweb/hgwebdir_mod.py
mercurial/i18n.py
mercurial/localrepo.py
mercurial/mail.py
mercurial/templatefilters.py
mercurial/util.py
mercurial/win32.py
--- a/hgext/convert/convcmd.py	Fri Apr 03 13:20:52 2009 -0500
+++ b/hgext/convert/convcmd.py	Fri Apr 03 14:51:48 2009 -0500
@@ -18,7 +18,7 @@
 import filemap
 
 import os, shutil
-from mercurial import hg, util
+from mercurial import hg, util, encoding
 from mercurial.i18n import _
 
 orig_encoding = 'ascii'
@@ -275,7 +275,7 @@
                 if "\n" in desc:
                     desc = desc.splitlines()[0]
                 # convert log message to local encoding without using
-                # tolocal() because util._encoding conver() use it as
+                # tolocal() because encoding.encoding conver() use it as
                 # 'utf-8'
                 self.ui.status("%d %s\n" % (num, recode(desc)))
                 self.ui.note(_("source: %s\n") % recode(c))
@@ -308,8 +308,8 @@
 
 def convert(ui, src, dest=None, revmapfile=None, **opts):
     global orig_encoding
-    orig_encoding = util._encoding
-    util._encoding = 'UTF-8'
+    orig_encoding = encoding.encoding
+    encoding.encoding = 'UTF-8'
 
     if not dest:
         dest = hg.defaultdest(src) + "-hg"
--- a/hgext/highlight/highlight.py	Fri Apr 03 13:20:52 2009 -0500
+++ b/hgext/highlight/highlight.py	Fri Apr 03 14:51:48 2009 -0500
@@ -6,7 +6,7 @@
 from mercurial import demandimport
 demandimport.ignore.extend(['pkgutil', 'pkg_resources', '__main__',])
 
-from mercurial import util
+from mercurial import util, encoding
 from mercurial.templatefilters import filters
 
 from pygments import highlight
@@ -30,19 +30,19 @@
         return
 
     # avoid UnicodeDecodeError in pygments
-    text = util.tolocal(text)
+    text = encoding.tolocal(text)
 
     # To get multi-line strings right, we can't format line-by-line
     try:
         lexer = guess_lexer_for_filename(fctx.path(), text[:1024],
-                                         encoding=util._encoding)
+                                         encoding=encoding.encoding)
     except (ClassNotFound, ValueError):
         try:
-            lexer = guess_lexer(text[:1024], encoding=util._encoding)
+            lexer = guess_lexer(text[:1024], encoding=encoding.encoding)
         except (ClassNotFound, ValueError):
-            lexer = TextLexer(encoding=util._encoding)
+            lexer = TextLexer(encoding=encoding.encoding)
 
-    formatter = HtmlFormatter(style=style, encoding=util._encoding)
+    formatter = HtmlFormatter(style=style, encoding=encoding.encoding)
 
     colorized = highlight(text, lexer, formatter)
     # strip wrapping div
--- a/hgext/win32mbcs.py	Fri Apr 03 13:20:52 2009 -0500
+++ b/hgext/win32mbcs.py	Fri Apr 03 14:51:48 2009 -0500
@@ -36,19 +36,19 @@
   [extensions]
   hgext.win32mbcs =
 
-Path encoding conversion are done between unicode and util._encoding
+Path encoding conversion are done between unicode and encoding.encoding
 which is decided by mercurial from current locale setting or HGENCODING.
 
 """
 
 import os
 from mercurial.i18n import _
-from mercurial import util
+from mercurial import util, encoding
 
 def decode(arg):
     if isinstance(arg, str):
-        uarg = arg.decode(util._encoding)
-        if arg == uarg.encode(util._encoding):
+        uarg = arg.decode(encoding.encoding)
+        if arg == uarg.encode(encoding.encoding):
             return uarg
         raise UnicodeError("Not local encoding")
     elif isinstance(arg, tuple):
@@ -59,7 +59,7 @@
 
 def encode(arg):
     if isinstance(arg, unicode):
-        return arg.encode(util._encoding)
+        return arg.encode(encoding.encoding)
     elif isinstance(arg, tuple):
         return tuple(map(encode, arg))
     elif isinstance(arg, list):
@@ -76,10 +76,10 @@
         # convert arguments to unicode, call func, then convert back
         return encode(func(*decode(args)))
     except UnicodeError:
-        # If not encoded with util._encoding, report it then
+        # If not encoded with encoding.encoding, report it then
         # continue with calling original function.
         raise util.Abort(_("[win32mbcs] filename conversion fail with"
-                         " %s encoding\n") % (util._encoding))
+                         " %s encoding\n") % (encoding.encoding))
 
 def wrapname(name):
     idx = name.rfind('.')
@@ -115,8 +115,9 @@
         return
 
     # fake is only for relevant environment.
-    if util._encoding.lower() in problematic_encodings.split():
+    if encoding.encoding.lower() in problematic_encodings.split():
         for f in funcs.split():
             wrapname(f)
-        ui.debug(_("[win32mbcs] activated with encoding: %s\n") % util._encoding)
+        ui.debug(_("[win32mbcs] activated with encoding: %s\n")
+                 % encoding.encoding)
 
--- a/mercurial/changelog.py	Fri Apr 03 13:20:52 2009 -0500
+++ b/mercurial/changelog.py	Fri Apr 03 14:51:48 2009 -0500
@@ -7,7 +7,7 @@
 
 from node import bin, hex, nullid
 from i18n import _
-import util, error, revlog
+import util, error, revlog, encoding
 
 def _string_escape(text):
     """
@@ -175,10 +175,10 @@
         if not text:
             return (nullid, "", (0, 0), [], "", {'branch': 'default'})
         last = text.index("\n\n")
-        desc = util.tolocal(text[last + 2:])
+        desc = encoding.tolocal(text[last + 2:])
         l = text[:last].split('\n')
         manifest = bin(l[0])
-        user = util.tolocal(l[1])
+        user = encoding.tolocal(l[1])
 
         extra_data = l[2].split(' ', 2)
         if len(extra_data) != 3:
@@ -205,7 +205,7 @@
         if "\n" in user:
             raise error.RevlogError(_("username %s contains a newline")
                                     % repr(user))
-        user, desc = util.fromlocal(user), util.fromlocal(desc)
+        user, desc = encoding.fromlocal(user), encoding.fromlocal(desc)
 
         if date:
             parseddate = "%d %d" % util.parsedate(date)
--- a/mercurial/cmdutil.py	Fri Apr 03 13:20:52 2009 -0500
+++ b/mercurial/cmdutil.py	Fri Apr 03 14:51:48 2009 -0500
@@ -7,7 +7,7 @@
 
 from node import hex, nullid, nullrev, short
 from i18n import _
-import os, sys, bisect, stat
+import os, sys, bisect, stat, encoding
 import mdiff, bdiff, util, templater, templatefilters, patch, errno, error
 import match as _match
 
@@ -626,7 +626,7 @@
 
         # don't show the default branch name
         if branch != 'default':
-            branch = util.tolocal(branch)
+            branch = encoding.tolocal(branch)
             self.ui.write(_("branch:      %s\n") % branch)
         for tag in self.repo.nodetags(changenode):
             self.ui.write(_("tag:         %s\n") % tag)
@@ -791,7 +791,7 @@
         def showbranches(**args):
             branch = ctx.branch()
             if branch != 'default':
-                branch = util.tolocal(branch)
+                branch = encoding.tolocal(branch)
                 return showlist('branch', [branch], plural='branches', **args)
 
         def showparents(**args):
--- a/mercurial/commands.py	Fri Apr 03 13:20:52 2009 -0500
+++ b/mercurial/commands.py	Fri Apr 03 14:51:48 2009 -0500
@@ -9,7 +9,7 @@
 from i18n import _, gettext
 import os, re, sys
 import hg, util, revlog, bundlerepo, extensions, copies, context, error
-import difflib, patch, time, help, mdiff, tempfile, url
+import difflib, patch, time, help, mdiff, tempfile, url, encoding
 import archival, changegroup, cmdutil, hgweb.server, sshserver, hbisect
 import merge as merge_
 
@@ -415,10 +415,10 @@
             if label not in [p.branch() for p in repo.parents()]:
                 raise util.Abort(_('a branch of the same name already exists'
                                    ' (use --force to override)'))
-        repo.dirstate.setbranch(util.fromlocal(label))
+        repo.dirstate.setbranch(encoding.fromlocal(label))
         ui.status(_('marked working directory as branch %s\n') % label)
     else:
-        ui.write("%s\n" % util.tolocal(repo.dirstate.branch()))
+        ui.write("%s\n" % encoding.tolocal(repo.dirstate.branch()))
 
 def branches(ui, repo, active=False):
     """list repository named branches
@@ -431,7 +431,7 @@
     Use the command 'hg update' to switch to an existing branch.
     """
     hexfunc = ui.debugflag and hex or short
-    activebranches = [util.tolocal(repo[n].branch())
+    activebranches = [encoding.tolocal(repo[n].branch())
                             for n in repo.heads(closed=False)]
     branches = util.sort([(tag in activebranches, repo.changelog.rev(node), tag)
                           for tag, node in repo.branchtags().items()])
@@ -449,7 +449,7 @@
                     notice = ' (closed)'
                 else:
                     notice = ' (inactive)'
-                rev = str(node).rjust(31 - util.colwidth(tag))
+                rev = str(node).rjust(31 - encoding.colwidth(tag))
                 data = tag, rev, hexfunc(hn), notice
                 ui.write("%s %s:%s%s\n" % data)
 
@@ -882,9 +882,9 @@
     problems = 0
 
     # encoding
-    ui.status(_("Checking encoding (%s)...\n") % util._encoding)
+    ui.status(_("Checking encoding (%s)...\n") % encoding.encoding)
     try:
-        util.fromlocal("test")
+        encoding.fromlocal("test")
     except util.Abort, inst:
         ui.write(" %s\n" % inst)
         ui.write(_(" (check that your locale is properly set)\n"))
@@ -1579,7 +1579,7 @@
             output.append(str(ctx.rev()))
 
     if repo.local() and default and not ui.quiet:
-        b = util.tolocal(ctx.branch())
+        b = encoding.tolocal(ctx.branch())
         if b != 'default':
             output.append("(%s)" % b)
 
@@ -1589,7 +1589,7 @@
             output.append(t)
 
     if branch:
-        output.append(util.tolocal(ctx.branch()))
+        output.append(encoding.tolocal(ctx.branch()))
 
     if tags:
         output.extend(ctx.tags())
@@ -2855,7 +2855,7 @@
         except error.LookupError:
             r = "    ?:%s" % hn
         else:
-            spaces = " " * (30 - util.colwidth(t))
+            spaces = " " * (30 - encoding.colwidth(t))
             if ui.verbose:
                 if repo.tagtype(t) == 'local':
                     tagtype = " local"
@@ -2976,8 +2976,9 @@
     ('', 'config', [], _('set/override config option')),
     ('', 'debug', None, _('enable debugging output')),
     ('', 'debugger', None, _('start debugger')),
-    ('', 'encoding', util._encoding, _('set the charset encoding')),
-    ('', 'encodingmode', util._encodingmode, _('set the charset encoding mode')),
+    ('', 'encoding', encoding.encoding, _('set the charset encoding')),
+    ('', 'encodingmode', encoding.encodingmode,
+     _('set the charset encoding mode')),
     ('', 'lsprof', None, _('print improved command execution profile')),
     ('', 'traceback', None, _('print traceback on exception')),
     ('', 'time', None, _('time how long the command takes')),
--- a/mercurial/dispatch.py	Fri Apr 03 13:20:52 2009 -0500
+++ b/mercurial/dispatch.py	Fri Apr 03 14:51:48 2009 -0500
@@ -8,7 +8,7 @@
 from i18n import _
 import os, sys, atexit, signal, pdb, socket, errno, shlex, time
 import util, commands, hg, fancyopts, extensions, hook, error
-import cmdutil
+import cmdutil, encoding
 import ui as _ui
 
 def run():
@@ -304,7 +304,7 @@
     # check for fallback encoding
     fallback = lui.config('ui', 'fallbackencoding')
     if fallback:
-        util._fallbackencoding = fallback
+        encoding.fallbackencoding = fallback
 
     fullargs = args
     cmd, func, args, options, cmdoptions = _parse(lui, args)
@@ -319,9 +319,9 @@
             "and --repository may only be abbreviated as --repo!"))
 
     if options["encoding"]:
-        util._encoding = options["encoding"]
+        encoding.encoding = options["encoding"]
     if options["encodingmode"]:
-        util._encodingmode = options["encodingmode"]
+        encoding.encodingmode = options["encodingmode"]
     if options["time"]:
         def get_times():
             t = os.times()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mercurial/encoding.py	Fri Apr 03 14:51:48 2009 -0500
@@ -0,0 +1,77 @@
+"""
+encoding.py - character transcoding support for Mercurial
+
+ Copyright 2005-2009 Matt Mackall <mpm@selenic.com> and others
+
+This software may be used and distributed according to the terms of
+the GNU General Public License version 2, incorporated herein by
+reference.
+"""
+
+import sys, unicodedata, locale, os, error
+
+_encodingfixup = {'646': 'ascii', 'ANSI_X3.4-1968': 'ascii'}
+
+try:
+    encoding = os.environ.get("HGENCODING")
+    if sys.platform == 'darwin' and not encoding:
+        # On darwin, getpreferredencoding ignores the locale environment and
+        # always returns mac-roman. We override this if the environment is
+        # not C (has been customized by the user).
+        locale.setlocale(locale.LC_CTYPE, '')
+        encoding = locale.getlocale()[1]
+    if not encoding:
+        encoding = locale.getpreferredencoding() or 'ascii'
+        encoding = _encodingfixup.get(encoding, encoding)
+except locale.Error:
+    encoding = 'ascii'
+encodingmode = os.environ.get("HGENCODINGMODE", "strict")
+fallbackencoding = 'ISO-8859-1'
+
+def tolocal(s):
+    """
+    Convert a string from internal UTF-8 to local encoding
+
+    All internal strings should be UTF-8 but some repos before the
+    implementation of locale support may contain latin1 or possibly
+    other character sets. We attempt to decode everything strictly
+    using UTF-8, then Latin-1, and failing that, we use UTF-8 and
+    replace unknown characters.
+    """
+    for e in ('UTF-8', fallbackencoding):
+        try:
+            u = s.decode(e) # attempt strict decoding
+            return u.encode(encoding, "replace")
+        except LookupError, k:
+            raise error.Abort("%s, please check your locale settings" % k)
+        except UnicodeDecodeError:
+            pass
+    u = s.decode("utf-8", "replace") # last ditch
+    return u.encode(encoding, "replace")
+
+def fromlocal(s):
+    """
+    Convert a string from the local character encoding to UTF-8
+
+    We attempt to decode strings using the encoding mode set by
+    HGENCODINGMODE, which defaults to 'strict'. In this mode, unknown
+    characters will cause an error message. Other modes include
+    'replace', which replaces unknown characters with a special
+    Unicode character, and 'ignore', which drops the character.
+    """
+    try:
+        return s.decode(encoding, encodingmode).encode("utf-8")
+    except UnicodeDecodeError, inst:
+        sub = s[max(0, inst.start-10):inst.start+10]
+        raise error.Abort("decoding near '%s': %s!" % (sub, inst))
+    except LookupError, k:
+        raise error.Abort("%s, please check your locale settings" % k)
+
+def colwidth(s):
+    "Find the column width of a UTF-8 string for display"
+    d = s.decode(encoding, 'replace')
+    if hasattr(unicodedata, 'east_asian_width'):
+        w = unicodedata.east_asian_width
+        return sum([w(c) in 'WF' and 2 or 1 for c in d])
+    return len(d)
+
--- a/mercurial/hgweb/hgweb_mod.py	Fri Apr 03 13:20:52 2009 -0500
+++ b/mercurial/hgweb/hgweb_mod.py	Fri Apr 03 14:51:48 2009 -0500
@@ -7,7 +7,7 @@
 # of the GNU General Public License, incorporated herein by reference.
 
 import os
-from mercurial import ui, hg, util, hook, error
+from mercurial import ui, hg, util, hook, error, encoding
 from mercurial import templater, templatefilters
 from common import get_mtime, style_map, ErrorResponse
 from common import HTTP_OK, HTTP_BAD_REQUEST, HTTP_NOT_FOUND, HTTP_SERVER_ERROR
@@ -65,7 +65,7 @@
             self.maxshortchanges = int(self.config("web", "maxshortchanges", 60))
             self.maxfiles = int(self.config("web", "maxfiles", 10))
             self.allowpull = self.configbool("web", "allowpull", True)
-            self.encoding = self.config("web", "encoding", util._encoding)
+            self.encoding = self.config("web", "encoding", encoding.encoding)
 
     def run(self):
         if not os.environ.get('GATEWAY_INTERFACE', '').startswith("CGI/1."):
--- a/mercurial/hgweb/hgwebdir_mod.py	Fri Apr 03 13:20:52 2009 -0500
+++ b/mercurial/hgweb/hgwebdir_mod.py	Fri Apr 03 14:51:48 2009 -0500
@@ -8,7 +8,7 @@
 
 import os
 from mercurial.i18n import _
-from mercurial import ui, hg, util, templater, templatefilters, error
+from mercurial import ui, hg, util, templater, templatefilters, error, encoding
 from common import ErrorResponse, get_mtime, staticfile, style_map, paritygen,\
                    get_contact, HTTP_OK, HTTP_NOT_FOUND, HTTP_SERVER_ERROR
 from hgweb_mod import hgweb
@@ -119,7 +119,7 @@
 
                 virtual = req.env.get("PATH_INFO", "").strip('/')
                 tmpl = self.templater(req)
-                ctype = tmpl('mimetype', encoding=util._encoding)
+                ctype = tmpl('mimetype', encoding=encoding.encoding)
                 ctype = templater.stringify(ctype)
 
                 # a static file
@@ -285,7 +285,7 @@
     def templater(self, req):
 
         def header(**map):
-            yield tmpl('header', encoding=util._encoding, **map)
+            yield tmpl('header', encoding=encoding.encoding, **map)
 
         def footer(**map):
             yield tmpl("footer", **map)
--- a/mercurial/i18n.py	Fri Apr 03 13:20:52 2009 -0500
+++ b/mercurial/i18n.py	Fri Apr 03 14:51:48 2009 -0500
@@ -7,7 +7,7 @@
 of the GNU General Public License, incorporated herein by reference.
 """
 
-import gettext, sys, os
+import gettext, sys, os, encoding
 
 # modelled after templater.templatepath:
 if hasattr(sys, 'frozen'):
@@ -37,15 +37,13 @@
     if message is None:
         return message
 
-    # We cannot just run the text through util.tolocal since that
-    # leads to infinite recursion when util._encoding is invalid.
+    # We cannot just run the text through encoding.tolocal since that
+    # leads to infinite recursion when encoding._encoding is invalid.
     try:
         u = t.ugettext(message)
-        return u.encode(util._encoding, "replace")
+        return u.encode(encoding.encoding, "replace")
     except LookupError:
         return message
 
 _ = gettext
 
-# Moved after _ because of circular import.
-import util
--- a/mercurial/localrepo.py	Fri Apr 03 13:20:52 2009 -0500
+++ b/mercurial/localrepo.py	Fri Apr 03 14:51:48 2009 -0500
@@ -9,7 +9,7 @@
 from i18n import _
 import repo, changegroup
 import changelog, dirstate, filelog, manifest, context, weakref
-import lock, transaction, stat, errno, ui, store
+import lock, transaction, stat, errno, ui, store, encoding
 import os, time, util, extensions, hook, inspect, error
 import match as match_
 import merge as merge_
@@ -188,7 +188,7 @@
                 fp.write(prevtags)
 
         # committed tags are stored in UTF-8
-        writetags(fp, names, util.fromlocal, prevtags)
+        writetags(fp, names, encoding.fromlocal, prevtags)
 
         if use_dirstate and '.hgtags' not in self.dirstate:
             self.add(['.hgtags'])
@@ -254,7 +254,7 @@
                     warn(_("cannot parse entry"))
                     continue
                 node, key = s
-                key = util.tolocal(key.strip()) # stored in UTF-8
+                key = encoding.tolocal(key.strip()) # stored in UTF-8
                 try:
                     bin_n = bin(node)
                 except TypeError:
@@ -297,7 +297,7 @@
             readtags(f.data().splitlines(), f, "global")
 
         try:
-            data = util.fromlocal(self.opener("localtags").read())
+            data = encoding.fromlocal(self.opener("localtags").read())
             # localtags are stored in the local character set
             # while the internal tag table is stored in UTF-8
             readtags(data.splitlines(), "localtags", "local")
@@ -397,7 +397,7 @@
         # the branch cache is stored on disk as UTF-8, but in the local
         # charset internally
         for k, v in partial.iteritems():
-            self.branchcache[util.tolocal(k)] = v
+            self.branchcache[encoding.tolocal(k)] = v
         return self.branchcache
 
 
@@ -647,7 +647,7 @@
                 except IOError:
                     self.ui.warn(_("Named branch could not be reset, "
                                    "current branch still is: %s\n")
-                                 % util.tolocal(self.dirstate.branch()))
+                                 % encoding.tolocal(self.dirstate.branch()))
                 self.invalidate()
                 self.dirstate.invalidate()
             else:
@@ -943,7 +943,8 @@
                 if p2 != nullid:
                     edittext.append("HG: branch merge")
                 if branchname:
-                    edittext.append("HG: branch '%s'" % util.tolocal(branchname))
+                    edittext.append("HG: branch '%s'"
+                                    % encoding.tolocal(branchname))
                 edittext.extend(["HG: added %s" % f for f in added])
                 edittext.extend(["HG: changed %s" % f for f in updated])
                 edittext.extend(["HG: removed %s" % f for f in removed])
--- a/mercurial/mail.py	Fri Apr 03 13:20:52 2009 -0500
+++ b/mercurial/mail.py	Fri Apr 03 14:51:48 2009 -0500
@@ -8,7 +8,7 @@
 from i18n import _
 import os, smtplib, socket
 import email.Header, email.MIMEText, email.Utils
-import util
+import util, encoding
 
 def _smtp(ui):
     '''build an smtp connection and return a function to send mail'''
@@ -100,8 +100,8 @@
 def _charsets(ui):
     '''Obtains charsets to send mail parts not containing patches.'''
     charsets = [cs.lower() for cs in ui.configlist('email', 'charsets')]
-    fallbacks = [util._fallbackencoding.lower(),
-                 util._encoding.lower(), 'utf-8']
+    fallbacks = [encoding.fallbackencoding.lower(),
+                 encoding.encoding.lower(), 'utf-8']
     for cs in fallbacks: # util.unique does not keep order
         if cs not in charsets:
             charsets.append(cs)
@@ -110,14 +110,14 @@
 def _encode(ui, s, charsets):
     '''Returns (converted) string, charset tuple.
     Finds out best charset by cycling through sendcharsets in descending
-    order. Tries both _encoding and _fallbackencoding for input. Only as
+    order. Tries both encoding and fallbackencoding for input. Only as
     last resort send as is in fake ascii.
     Caveat: Do not use for mail parts containing patches!'''
     try:
         s.decode('ascii')
     except UnicodeDecodeError:
         sendcharsets = charsets or _charsets(ui)
-        for ics in (util._encoding, util._fallbackencoding):
+        for ics in (encoding.encoding, encoding.fallbackencoding):
             try:
                 u = s.decode(ics)
             except UnicodeDecodeError:
--- a/mercurial/templatefilters.py	Fri Apr 03 13:20:52 2009 -0500
+++ b/mercurial/templatefilters.py	Fri Apr 03 14:51:48 2009 -0500
@@ -6,7 +6,7 @@
 # of the GNU General Public License, incorporated herein by reference.
 
 import cgi, re, os, time, urllib, textwrap
-import util, templater
+import util, templater, encoding
 
 agescales = [("second", 1),
              ("minute", 60),
@@ -76,7 +76,7 @@
     return text.replace('\n', '<br/>\n')
 
 def obfuscate(text):
-    text = unicode(text, util._encoding, 'replace')
+    text = unicode(text, encoding.encoding, 'replace')
     return ''.join(['&#%d;' % ord(c) for c in text])
 
 def domain(author):
--- a/mercurial/util.py	Fri Apr 03 13:20:52 2009 -0500
+++ b/mercurial/util.py	Fri Apr 03 14:51:48 2009 -0500
@@ -14,8 +14,8 @@
 
 from i18n import _
 import cStringIO, errno, re, shutil, sys, tempfile, traceback, error
-import os, stat, threading, time, calendar, ConfigParser, locale, glob, osutil
-import imp, unicodedata
+import os, stat, threading, time, calendar, ConfigParser, glob, osutil
+import imp
 
 # Python compatibility
 
@@ -81,71 +81,6 @@
     popen3 = os.popen3
 
 
-_encodingfixup = {'646': 'ascii', 'ANSI_X3.4-1968': 'ascii'}
-
-try:
-    _encoding = os.environ.get("HGENCODING")
-    if sys.platform == 'darwin' and not _encoding:
-        # On darwin, getpreferredencoding ignores the locale environment and
-        # always returns mac-roman. We override this if the environment is
-        # not C (has been customized by the user).
-        locale.setlocale(locale.LC_CTYPE, '')
-        _encoding = locale.getlocale()[1]
-    if not _encoding:
-        _encoding = locale.getpreferredencoding() or 'ascii'
-        _encoding = _encodingfixup.get(_encoding, _encoding)
-except locale.Error:
-    _encoding = 'ascii'
-_encodingmode = os.environ.get("HGENCODINGMODE", "strict")
-_fallbackencoding = 'ISO-8859-1'
-
-def tolocal(s):
-    """
-    Convert a string from internal UTF-8 to local encoding
-
-    All internal strings should be UTF-8 but some repos before the
-    implementation of locale support may contain latin1 or possibly
-    other character sets. We attempt to decode everything strictly
-    using UTF-8, then Latin-1, and failing that, we use UTF-8 and
-    replace unknown characters.
-    """
-    for e in ('UTF-8', _fallbackencoding):
-        try:
-            u = s.decode(e) # attempt strict decoding
-            return u.encode(_encoding, "replace")
-        except LookupError, k:
-            raise Abort(_("%s, please check your locale settings") % k)
-        except UnicodeDecodeError:
-            pass
-    u = s.decode("utf-8", "replace") # last ditch
-    return u.encode(_encoding, "replace")
-
-def fromlocal(s):
-    """
-    Convert a string from the local character encoding to UTF-8
-
-    We attempt to decode strings using the encoding mode set by
-    HGENCODINGMODE, which defaults to 'strict'. In this mode, unknown
-    characters will cause an error message. Other modes include
-    'replace', which replaces unknown characters with a special
-    Unicode character, and 'ignore', which drops the character.
-    """
-    try:
-        return s.decode(_encoding, _encodingmode).encode("utf-8")
-    except UnicodeDecodeError, inst:
-        sub = s[max(0, inst.start-10):inst.start+10]
-        raise Abort("decoding near '%s': %s!" % (sub, inst))
-    except LookupError, k:
-        raise Abort(_("%s, please check your locale settings") % k)
-
-def colwidth(s):
-    "Find the column width of a UTF-8 string for display"
-    d = s.decode(_encoding, 'replace')
-    if hasattr(unicodedata, 'east_asian_width'):
-        w = unicodedata.east_asian_width
-        return sum([w(c) in 'WF' and 2 or 1 for c in d])
-    return len(d)
-
 def version():
     """Return version information if available."""
     try:
--- a/mercurial/win32.py	Fri Apr 03 13:20:52 2009 -0500
+++ b/mercurial/win32.py	Fri Apr 03 14:51:48 2009 -0500
@@ -16,7 +16,7 @@
 
 import errno, os, sys, pywintypes, win32con, win32file, win32process
 import cStringIO, winerror
-import osutil
+import osutil, encoding
 import util
 from win32com.shell import shell,shellcon
 
@@ -213,7 +213,7 @@
         try:
             val = QueryValueEx(OpenKey(s, key), valname)[0]
             # never let a Unicode string escape into the wild
-            return util.tolocal(val.encode('UTF-8'))
+            return encoding.tolocal(val.encode('UTF-8'))
         except EnvironmentError:
             pass