# HG changeset patch # User Yuya Nishihara # Date 1451205905 -32400 # Node ID 8ddf893560fa9e664f8cfe7b5ee1e880d6c0f4a3 # Parent f4418ff2f7008c50d636517037ab251506b18dbf templatefilters: add "utf8" to get utf-8 bytes from local-encoding text This will be applied prior to "|json" filter. This sounds like odd, but it is necessary to handle local-encoding text as well as raw filename bytes. Because filenames are bytes in Mercurial and Unix world, {filename|json} should preserve the original byte sequence, which implies {x|json} -> '"' toutf8b(x) '"' On the other hand, most template strings are in local encoding. Because "|json" filter have to be byte-transparent to filenames, we need something to annotate an input as a local string, that's what "|utf8" will do. {x|utf8|json} -> '"' toutf8b(fromlocal(x)) '"' "|utf8" is an explicit call, so aborts if input bytes can't be converted to UTF-8. diff -r f4418ff2f700 -r 8ddf893560fa mercurial/templatefilters.py --- a/mercurial/templatefilters.py Sun Dec 27 17:16:45 2015 +0900 +++ b/mercurial/templatefilters.py Sun Dec 27 17:45:05 2015 +0900 @@ -377,6 +377,10 @@ """:emailuser: Any text. Returns the user portion of an email address.""" return util.emailuser(text) +def utf8(text): + """:utf8: Any text. Converts from the local character encoding to UTF-8.""" + return encoding.fromlocal(text) + def xmlescape(text): text = (text .replace('&', '&') @@ -422,6 +426,7 @@ "urlescape": urlescape, "user": userfilter, "emailuser": emailuser, + "utf8": utf8, "xmlescape": xmlescape, } diff -r f4418ff2f700 -r 8ddf893560fa tests/test-command-template.t --- a/tests/test-command-template.t Sun Dec 27 17:16:45 2015 +0900 +++ b/tests/test-command-template.t Sun Dec 27 17:45:05 2015 +0900 @@ -3547,6 +3547,7 @@ $ hg init nonascii $ cd nonascii $ python < open('latin1', 'w').write('\xe9') > open('utf-8', 'w').write('\xc3\xa9') > EOF $ HGENCODING=utf-8 hg branch -q `cat utf-8` @@ -3563,4 +3564,17 @@ $ HGENCODING=ascii hg log -T "{'`cat utf-8`'|json}\n" -l1 "\ufffd\ufffd" +utf8 filter: + + $ HGENCODING=ascii hg log -T "round-trip: {branch|utf8|hex}\n" -r0 + round-trip: c3a9 + $ HGENCODING=latin1 hg log -T "decoded: {'`cat latin1`'|utf8|hex}\n" -l1 + decoded: c3a9 + $ HGENCODING=ascii hg log -T "replaced: {'`cat latin1`'|utf8|hex}\n" -l1 + abort: decoding near * (glob) + [255] + $ hg log -T "invalid type: {rev|utf8}\n" -r0 + abort: template filter 'utf8' is not compatible with keyword 'rev' + [255] + $ cd ..