compat: initialize LC_CTYPE locale on all Python versions and platforms
authorManuel Jacob <me@manueljacob.de>
Fri, 26 Jun 2020 04:07:50 +0200
changeset 45001 a25343d16ebe
parent 45000 3fadbdc47aed
child 45002 d2227d4c9e6b
compat: initialize LC_CTYPE locale on all Python versions and platforms Previously, the LC_CTYPE locale was not initialized according to user settings on all Python versions (e.g. never on Python 2) and platforms (e.g. not on some Python < 3.8 on Windows). This broke e.g. non-ASCII filenames passed to the Subversion bindings on Python 2, resulting in error messages like "file:///tmp/a%C3%A4 does not look like a Subversion repository to libsvn version 1.14.0". The following command could be used to test this functionality. Adding it to the test suite would be pointless, as the locale is always set to "C" during test runs. @command(b'check_initial_codeset', norepo=True) def check_initial_codeset(ui): codeset1 = locale.nl_langinfo(locale.CODESET) locale.setlocale(locale.LC_ALL, '') codeset2 = locale.nl_langinfo(locale.CODESET) assert codeset1 == codeset2
mercurial/pycompat.py
--- a/mercurial/pycompat.py	Thu Jun 25 10:32:51 2020 -0700
+++ b/mercurial/pycompat.py	Fri Jun 26 04:07:50 2020 +0200
@@ -13,6 +13,7 @@
 import getopt
 import inspect
 import json
+import locale
 import os
 import shlex
 import sys
@@ -93,6 +94,26 @@
     return _rapply(f, xs)
 
 
+# Passing the '' locale means that the locale should be set according to the
+# user settings (environment variables).
+# Python sometimes avoids setting the global locale settings. When interfacing
+# with C code (e.g. the curses module or the Subversion bindings), the global
+# locale settings must be initialized correctly. Python 2 does not initialize
+# the global locale settings on interpreter startup. Python 3 sometimes
+# initializes LC_CTYPE, but not consistently at least on Windows. Therefore we
+# explicitly initialize it to get consistent behavior if it's not already
+# initialized. Since CPython commit 177d921c8c03d30daa32994362023f777624b10d,
+# LC_CTYPE is always initialized. If we require Python 3.8+, we should re-check
+# if we can remove this code.
+if locale.setlocale(locale.LC_CTYPE, None) == 'C':
+    try:
+        locale.setlocale(locale.LC_CTYPE, '')
+    except locale.Error:
+        # The likely case is that the locale from the environment variables is
+        # unknown.
+        pass
+
+
 if ispy3:
     import builtins
     import codecs