util: clarify purpose of MBTextWrapper class
authorNicolas Dumazet <nicdumz.commits@gmail.com>
Tue, 09 Nov 2010 13:43:35 +0900
changeset 12957 9f2ac318b92e
parent 12956 0406682c6905
child 12958 8957c39867f6
util: clarify purpose of MBTextWrapper class It's easy to get confused and scared of an Unicode monster when skimming through this code: document that this is really just about column-counting.
mercurial/util.py
--- a/mercurial/util.py	Mon Nov 08 17:29:23 2010 -0600
+++ b/mercurial/util.py	Tue Nov 09 13:43:35 2010 +0900
@@ -1328,15 +1328,26 @@
 #### naming convention of below implementation follows 'textwrap' module
 
 class MBTextWrapper(textwrap.TextWrapper):
+    """
+    Extend TextWrapper for double-width characters.
+
+    Some Asian characters use two terminal columns instead of one.
+    A good example of this behavior can be seen with u'\u65e5\u672c',
+    the two Japanese characters for "Japan":
+    len() returns 2, but when printed to a terminal, they eat 4 columns.
+
+    (Note that this has nothing to do whatsoever with unicode
+    representation, or encoding of the underlying string)
+    """
     def __init__(self, **kwargs):
         textwrap.TextWrapper.__init__(self, **kwargs)
 
     def _cutdown(self, str, space_left):
         l = 0
         ucstr = unicode(str, encoding.encoding)
-        w = unicodedata.east_asian_width
+        colwidth = unicodedata.east_asian_width
         for i in xrange(len(ucstr)):
-            l += w(ucstr[i]) in 'WFA' and 2 or 1
+            l += colwidth(ucstr[i]) in 'WFA' and 2 or 1
             if space_left < l:
                 return (ucstr[:i].encode(encoding.encoding),
                         ucstr[i:].encode(encoding.encoding))