# HG changeset patch # User Yuya Nishihara # Date 1446648495 -32400 # Node ID baa77652be68a56ba69aac36c36fe8823089fce2 # Parent 9350f00a7b239fa72e598f2d28c603149bde337e templatefilters: try round-trip utf-8 conversion by json filter (issue4933) As JSON string is known to be a unicode, we should try round-trip conversion for localstr type. This patch tests localstr type explicitly because encoding.fromlocal() may raise Abort for undecodable str, which is probably not what we want. Maybe we can refactor json filter to use encoding module more later. Still "{desc|json}" can't round-trip because showdescription() modifies a localstr object. diff -r 9350f00a7b23 -r baa77652be68 mercurial/templatefilters.py --- a/mercurial/templatefilters.py Tue Nov 03 12:16:54 2015 -0800 +++ b/mercurial/templatefilters.py Wed Nov 04 23:48:15 2015 +0900 @@ -197,7 +197,11 @@ return {None: 'null', False: 'false', True: 'true'}[obj] elif isinstance(obj, int) or isinstance(obj, float): return str(obj) + elif isinstance(obj, encoding.localstr): + u = encoding.fromlocal(obj).decode('utf-8') # can round-trip + return '"%s"' % jsonescape(u) elif isinstance(obj, str): + # no encoding.fromlocal() because it may abort if obj can't be decoded u = unicode(obj, encoding.encoding, 'replace') return '"%s"' % jsonescape(u) elif isinstance(obj, unicode): diff -r 9350f00a7b23 -r baa77652be68 tests/test-command-template.t --- a/tests/test-command-template.t Tue Nov 03 12:16:54 2015 -0800 +++ b/tests/test-command-template.t Wed Nov 04 23:48:15 2015 +0900 @@ -3479,3 +3479,26 @@ $ hg log -T "\\xy" -R a hg: parse error: invalid \x escape [255] + +Set up repository for non-ascii encoding tests: + + $ hg init nonascii + $ cd nonascii + $ python < open('utf-8', 'w').write('\xc3\xa9') + > EOF + $ HGENCODING=utf-8 hg branch -q `cat utf-8` + $ HGENCODING=utf-8 hg ci -qAm 'non-ascii branch' utf-8 + +json filter should try round-trip conversion to utf-8: + + $ HGENCODING=ascii hg log -T "{branch|json}\n" -r0 + "\u00e9" + +json filter should not abort if it can't decode bytes: +(not sure the current behavior is right; we might want to use utf-8b encoding?) + + $ HGENCODING=ascii hg log -T "{'`cat utf-8`'|json}\n" -l1 + "\ufffd\ufffd" + + $ cd ..