util: fix ellipsis() not to break multi-byte sequence (issue2564) stable 1.7.3
authorYuya Nishihara <yuya@tcha.org>
Sat, 25 Dec 2010 21:59:00 +0900
branchstable
changeset 13225 e3bf16703e26
parent 13224 0a1eefaf98f2
child 13226 6264b6dedf11
util: fix ellipsis() not to break multi-byte sequence (issue2564) It tries to convert localstr to unicode before truncating. Because we cannot assume that the given text is encoded in local encoding, it falls back to raw string in case of unicode error.
mercurial/util.py
tests/test-notify.t
--- a/mercurial/util.py	Fri Dec 31 15:14:51 2010 +0100
+++ b/mercurial/util.py	Sat Dec 25 21:59:00 2010 +0900
@@ -1242,12 +1242,23 @@
         r = None
     return author[author.find('<') + 1:r]
 
+def _ellipsis(text, maxlength):
+    if len(text) <= maxlength:
+        return text, False
+    else:
+        return "%s..." % (text[:maxlength - 3]), True
+
 def ellipsis(text, maxlength=400):
     """Trim string to at most maxlength (default: 400) characters."""
-    if len(text) <= maxlength:
-        return text
-    else:
-        return "%s..." % (text[:maxlength - 3])
+    try:
+        # use unicode not to split at intermediate multi-byte sequence
+        utext, truncated = _ellipsis(text.decode(encoding.encoding),
+                                     maxlength)
+        if not truncated:
+            return text
+        return utext.encode(encoding.encoding)
+    except (UnicodeDecodeError, UnicodeEncodeError):
+        return _ellipsis(text, maxlength)[0]
 
 def walkrepos(path, followsym=False, seen_dirs=None, recurse=False):
     '''yield every hg repository under path, recursively.'''
--- a/tests/test-notify.t	Fri Dec 31 15:14:51 2010 +0100
+++ b/tests/test-notify.t	Sat Dec 25 21:59:00 2010 +0900
@@ -302,3 +302,49 @@
   changeset 22c88b85aa27 in b
   description: merge
   (run 'hg update' to get a working copy)
+
+truncate multi-byte subject
+
+  $ cat <<EOF >> $HGRCPATH
+  > [notify]
+  > maxsubject = 4
+  > EOF
+  $ echo a >> a/a
+  $ hg --cwd a --encoding utf-8 commit -A -d '0 0' \
+  >   -m `python -c 'print "\xc3\xa0\xc3\xa1\xc3\xa2\xc3\xa3\xc3\xa4"'`
+  $ hg --traceback --cwd b --encoding utf-8 pull ../a | \
+  >   python -c 'import sys,re; print re.sub("\n\t", " ", sys.stdin.read()),'
+  pulling from ../a
+  searching for changes
+  adding changesets
+  adding manifests
+  adding file changes
+  added 1 changesets with 1 changes to 1 files
+  Content-Type: text/plain; charset="us-ascii"
+  MIME-Version: 1.0
+  Content-Transfer-Encoding: 8bit
+  X-Test: foo
+  Date: * (glob)
+  Subject: \xc3\xa0... (esc)
+  From: test@test.com
+  X-Hg-Notification: changeset 4a47f01c1356
+  Message-Id: <*> (glob)
+  To: baz@test.com, foo@bar
+  
+  changeset 4a47f01c1356 in b
+  description: \xc3\xa0\xc3\xa1\xc3\xa2\xc3\xa3\xc3\xa4 (esc)
+  diffstat:
+  
+   a |  1 +
+   1 files changed, 1 insertions(+), 0 deletions(-)
+  
+  diffs (7 lines):
+  
+  diff -r 22c88b85aa27 -r 4a47f01c1356 a
+  --- a/a	Thu Jan 01 00:00:03 1970 +0000
+  +++ b/a	Thu Jan 01 00:00:00 1970 +0000
+  @@ -1,2 +1,3 @@
+   a
+   a
+  +a
+  (run 'hg update' to get a working copy)