merge with stable
authorYuya Nishihara <yuya@tcha.org>
Mon, 29 Jun 2020 20:53:32 +0900
changeset 45019 4a503c1b664a
parent 45018 f2dc337117b9 (current diff)
parent 45013 bd0f122f3f51 (diff)
child 45025 24b1a8eb73aa
merge with stable
hgext/convert/subversion.py
hgext/histedit.py
mercurial/crecord.py
mercurial/util.py
tests/run-tests.py
--- a/hgext/convert/subversion.py	Sun Jun 28 17:52:29 2020 +0200
+++ b/hgext/convert/subversion.py	Mon Jun 29 20:53:32 2020 +0900
@@ -187,13 +187,14 @@
     """Fetch SVN log in a subprocess and channel them back to parent to
     avoid memory collection issues.
     """
-    if svn is None:
-        raise error.Abort(
-            _(b'debugsvnlog could not load Subversion python bindings')
-        )
+    with util.with_lc_ctype():
+        if svn is None:
+            raise error.Abort(
+                _(b'debugsvnlog could not load Subversion python bindings')
+            )
 
-    args = decodeargs(ui.fin.read())
-    get_log_child(ui.fout, *args)
+        args = decodeargs(ui.fin.read())
+        get_log_child(ui.fout, *args)
 
 
 class logstream(object):
@@ -420,18 +421,19 @@
         self.url = geturl(url)
         self.encoding = b'UTF-8'  # Subversion is always nominal UTF-8
         try:
-            self.transport = transport.SvnRaTransport(url=self.url)
-            self.ra = self.transport.ra
-            self.ctx = self.transport.client
-            self.baseurl = svn.ra.get_repos_root(self.ra)
-            # Module is either empty or a repository path starting with
-            # a slash and not ending with a slash.
-            self.module = urlreq.unquote(self.url[len(self.baseurl) :])
-            self.prevmodule = None
-            self.rootmodule = self.module
-            self.commits = {}
-            self.paths = {}
-            self.uuid = svn.ra.get_uuid(self.ra)
+            with util.with_lc_ctype():
+                self.transport = transport.SvnRaTransport(url=self.url)
+                self.ra = self.transport.ra
+                self.ctx = self.transport.client
+                self.baseurl = svn.ra.get_repos_root(self.ra)
+                # Module is either empty or a repository path starting with
+                # a slash and not ending with a slash.
+                self.module = urlreq.unquote(self.url[len(self.baseurl) :])
+                self.prevmodule = None
+                self.rootmodule = self.module
+                self.commits = {}
+                self.paths = {}
+                self.uuid = svn.ra.get_uuid(self.ra)
         except svn.core.SubversionException:
             ui.traceback()
             svnversion = b'%d.%d.%d' % (
@@ -477,7 +479,8 @@
             )
 
         try:
-            self.head = self.latest(self.module, latest)
+            with util.with_lc_ctype():
+                self.head = self.latest(self.module, latest)
         except SvnPathNotFound:
             self.head = None
         if not self.head:
@@ -494,6 +497,13 @@
             self.wc = None
         self.convertfp = None
 
+    def before(self):
+        self.with_lc_ctype = util.with_lc_ctype()
+        self.with_lc_ctype.__enter__()
+
+    def after(self):
+        self.with_lc_ctype.__exit__(None, None, None)
+
     def setrevmap(self, revmap):
         lastrevs = {}
         for revid in revmap:
--- a/hgext/histedit.py	Sun Jun 28 17:52:29 2020 +0200
+++ b/hgext/histedit.py	Mon Jun 29 20:53:32 2020 +0900
@@ -201,7 +201,6 @@
     termios = None
 
 import functools
-import locale
 import os
 import struct
 
@@ -1711,11 +1710,8 @@
         ctxs = []
         for i, r in enumerate(revs):
             ctxs.append(histeditrule(ui, repo[r], i))
-        # Curses requires setting the locale or it will default to the C
-        # locale. This sets the locale to the user's default system
-        # locale.
-        locale.setlocale(locale.LC_ALL, '')
-        rc = curses.wrapper(functools.partial(_chisteditmain, repo, ctxs))
+        with util.with_lc_ctype():
+            rc = curses.wrapper(functools.partial(_chisteditmain, repo, ctxs))
         curses.echo()
         curses.endwin()
         if rc is False:
--- a/mercurial/crecord.py	Sun Jun 28 17:52:29 2020 +0200
+++ b/mercurial/crecord.py	Mon Jun 29 20:53:32 2020 +0900
@@ -10,7 +10,6 @@
 
 from __future__ import absolute_import
 
-import locale
 import os
 import re
 import signal
@@ -566,14 +565,12 @@
     """
     ui.write(_(b'starting interactive selection\n'))
     chunkselector = curseschunkselector(headerlist, ui, operation)
-    # This is required for ncurses to display non-ASCII characters in
-    # default user locale encoding correctly.  --immerrr
-    locale.setlocale(locale.LC_ALL, '')
     origsigtstp = sentinel = object()
     if util.safehasattr(signal, b'SIGTSTP'):
         origsigtstp = signal.getsignal(signal.SIGTSTP)
     try:
-        curses.wrapper(chunkselector.main)
+        with util.with_lc_ctype():
+            curses.wrapper(chunkselector.main)
         if chunkselector.initexc is not None:
             raise chunkselector.initexc
         # ncurses does not restore signal handler for SIGTSTP
--- a/mercurial/util.py	Sun Jun 28 17:52:29 2020 +0200
+++ b/mercurial/util.py	Mon Jun 29 20:53:32 2020 +0900
@@ -22,6 +22,7 @@
 import gc
 import hashlib
 import itertools
+import locale
 import mmap
 import os
 import platform as pyplatform
@@ -3596,3 +3597,32 @@
         if not (byte & 0x80):
             return result
         shift += 7
+
+
+# Passing the '' locale means that the locale should be set according to the
+# user settings (environment variables).
+# Python sometimes avoids setting the global locale settings. When interfacing
+# with C code (e.g. the curses module or the Subversion bindings), the global
+# locale settings must be initialized correctly. Python 2 does not initialize
+# the global locale settings on interpreter startup. Python 3 sometimes
+# initializes LC_CTYPE, but not consistently at least on Windows. Therefore we
+# explicitly initialize it to get consistent behavior if it's not already
+# initialized. Since CPython commit 177d921c8c03d30daa32994362023f777624b10d,
+# LC_CTYPE is always initialized. If we require Python 3.8+, we should re-check
+# if we can remove this code.
+@contextlib.contextmanager
+def with_lc_ctype():
+    oldloc = locale.setlocale(locale.LC_CTYPE, None)
+    if oldloc == 'C':
+        try:
+            try:
+                locale.setlocale(locale.LC_CTYPE, '')
+            except locale.Error:
+                # The likely case is that the locale from the environment
+                # variables is unknown.
+                pass
+            yield
+        finally:
+            locale.setlocale(locale.LC_CTYPE, oldloc)
+    else:
+        yield
--- a/tests/run-tests.py	Sun Jun 28 17:52:29 2020 +0200
+++ b/tests/run-tests.py	Mon Jun 29 20:53:32 2020 +0900
@@ -2069,7 +2069,7 @@
         if el.endswith(b" (esc)\n"):
             if PYTHON3:
                 el = el[:-7].decode('unicode_escape') + '\n'
-                el = el.encode('utf-8')
+                el = el.encode('latin-1')
             else:
                 el = el[:-7].decode('string-escape') + '\n'
         if el == l or os.name == 'nt' and el[:-1] + b'\r\n' == l:
--- a/tests/test-unified-test.t	Sun Jun 28 17:52:29 2020 +0200
+++ b/tests/test-unified-test.t	Mon Jun 29 20:53:32 2020 +0900
@@ -75,6 +75,16 @@
   crlf\r (esc)
 #endif
 
+Escapes:
+
+  $ $PYTHON -c 'from mercurial.utils.procutil import stdout; stdout.write(b"\xff")'
+  \xff (no-eol) (esc)
+
+Escapes with conditions:
+
+  $ $PYTHON -c 'from mercurial.utils.procutil import stdout; stdout.write(b"\xff")'
+  \xff (no-eol) (esc) (true !)
+
 Combining esc with other markups - and handling lines ending with \r instead of \n:
 
   $ printf 'foo/bar\r'