templatefilters: add "utf8" to get utf-8 bytes from local-encoding text
authorYuya Nishihara <yuya@tcha.org>
Sun, 27 Dec 2015 17:45:05 +0900
changeset 28209 8ddf893560fa
parent 28208 f4418ff2f700
child 28210 964ad57eff30
templatefilters: add "utf8" to get utf-8 bytes from local-encoding text This will be applied prior to "|json" filter. This sounds like odd, but it is necessary to handle local-encoding text as well as raw filename bytes. Because filenames are bytes in Mercurial and Unix world, {filename|json} should preserve the original byte sequence, which implies {x|json} -> '"' toutf8b(x) '"' On the other hand, most template strings are in local encoding. Because "|json" filter have to be byte-transparent to filenames, we need something to annotate an input as a local string, that's what "|utf8" will do. {x|utf8|json} -> '"' toutf8b(fromlocal(x)) '"' "|utf8" is an explicit call, so aborts if input bytes can't be converted to UTF-8.
mercurial/templatefilters.py
tests/test-command-template.t
--- a/mercurial/templatefilters.py	Sun Dec 27 17:16:45 2015 +0900
+++ b/mercurial/templatefilters.py	Sun Dec 27 17:45:05 2015 +0900
@@ -377,6 +377,10 @@
     """:emailuser: Any text. Returns the user portion of an email address."""
     return util.emailuser(text)
 
+def utf8(text):
+    """:utf8: Any text. Converts from the local character encoding to UTF-8."""
+    return encoding.fromlocal(text)
+
 def xmlescape(text):
     text = (text
             .replace('&', '&amp;')
@@ -422,6 +426,7 @@
     "urlescape": urlescape,
     "user": userfilter,
     "emailuser": emailuser,
+    "utf8": utf8,
     "xmlescape": xmlescape,
 }
 
--- a/tests/test-command-template.t	Sun Dec 27 17:16:45 2015 +0900
+++ b/tests/test-command-template.t	Sun Dec 27 17:45:05 2015 +0900
@@ -3547,6 +3547,7 @@
   $ hg init nonascii
   $ cd nonascii
   $ python <<EOF
+  > open('latin1', 'w').write('\xe9')
   > open('utf-8', 'w').write('\xc3\xa9')
   > EOF
   $ HGENCODING=utf-8 hg branch -q `cat utf-8`
@@ -3563,4 +3564,17 @@
   $ HGENCODING=ascii hg log -T "{'`cat utf-8`'|json}\n" -l1
   "\ufffd\ufffd"
 
+utf8 filter:
+
+  $ HGENCODING=ascii hg log -T "round-trip: {branch|utf8|hex}\n" -r0
+  round-trip: c3a9
+  $ HGENCODING=latin1 hg log -T "decoded: {'`cat latin1`'|utf8|hex}\n" -l1
+  decoded: c3a9
+  $ HGENCODING=ascii hg log -T "replaced: {'`cat latin1`'|utf8|hex}\n" -l1
+  abort: decoding near * (glob)
+  [255]
+  $ hg log -T "invalid type: {rev|utf8}\n" -r0
+  abort: template filter 'utf8' is not compatible with keyword 'rev'
+  [255]
+
   $ cd ..