mercurial: comparison i18n/polib.py

equal deleted inserted replaced

-:c3b7d9c54edd
+:19fc5a986669
-# -*- coding: utf-8 -*-
 # no-check-code
+# -* coding: utf-8 -*-
 #
 # License: MIT (see LICENSE file provided)
 # vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4:
 """
 :func:`~polib.mofile` convenience functions.
 """
 from __future__ import absolute_import
-__author__    = 'David Jean Louis <izimobil@gmail.com>'
+__author__ = 'David Jean Louis <izimobil@gmail.com>'
-__version__   = '0.6.4'
+__version__ = '1.0.7'
-__all__       = ['pofile', 'POFile', 'POEntry', 'mofile', 'MOFile', 'MOEntry',
+__all__ = ['pofile', 'POFile', 'POEntry', 'mofile', 'MOFile', 'MOEntry',
-'detect_encoding', 'escape', 'unescape', 'detect_encoding',]
+'default_encoding', 'escape', 'unescape', 'detect_encoding', ]
 import array
 import codecs
 import os
 import re
 import struct
 import sys
 import textwrap
-import types
+try:
+import io
+except ImportError:
+# replacement of io.open() for python < 2.6
+# we use codecs instead
+class io(object):
+@staticmethod
+def open(fpath, mode='r', encoding=None):
+return codecs.open(fpath, mode, encoding)
 # the default encoding to use when encoding cannot be detected
 default_encoding = 'utf-8'
+# python 2/3 compatibility helpers {{{
+if sys.version_info[:2] < (3, 0):
+PY3 = False
+text_type = unicode
+def b(s):
+return s
+def u(s):
+return unicode(s, "unicode_escape")
+else:
+PY3 = True
+text_type = str
+def b(s):
+return s.encode("latin-1")
+def u(s):
+return s
+# }}}
 # _pofile_or_mofile {{{
 def _pofile_or_mofile(f, type, **kwargs):
 """
 Internal function used by :func:`polib.pofile` and :func:`polib.mofile` to
 honor the DRY concept.
 # parse the file
 kls = type == 'pofile' and _POFileParser or _MOFileParser
 parser = kls(
 f,
 encoding=enc,
-check_for_duplicates=kwargs.get('check_for_duplicates', False)
+check_for_duplicates=kwargs.get('check_for_duplicates', False),
+klass=kwargs.get('klass')
 )
 instance = parser.parse()
 instance.wrapwidth = kwargs.get('wrapwidth', 78)
 return instance
+# }}}
+# _is_file {{{
+def _is_file(filename_or_contents):
+"""
+Safely returns the value of os.path.exists(filename_or_contents).
+Arguments:
+``filename_or_contents``
+either a filename, or a string holding the contents of some file.
+In the latter case, this function will always return False.
+"""
+try:
+return os.path.exists(filename_or_contents)
+except (ValueError, UnicodeEncodeError):
+return False
 # }}}
 # function pofile() {{{
 def pofile(pofile, **kwargs):
 """
 Convenience function that parses the po or pot file ``pofile`` and returns
 a :class:`~polib.POFile` instance.
 encoding will be auto-detected).
 ``check_for_duplicates``
 whether to check for duplicate entries when adding entries to the
 file (optional, default: ``False``).
+``klass``
+class which is used to instantiate the return value (optional,
+default: ``None``, the return value with be a :class:`~polib.POFile`
+instance).
 """
 return _pofile_or_mofile(pofile, 'pofile', **kwargs)
 # }}}
 # function mofile() {{{
 def mofile(mofile, **kwargs):
 """
 Convenience function that parses the mo file ``mofile`` and returns a
 :class:`~polib.MOFile` instance.
 encoding will be auto-detected).
 ``check_for_duplicates``
 whether to check for duplicate entries when adding entries to the
 file (optional, default: ``False``).
+``klass``
+class which is used to instantiate the return value (optional,
+default: ``None``, the return value with be a :class:`~polib.POFile`
+instance).
 """
 return _pofile_or_mofile(mofile, 'mofile', **kwargs)
 # }}}
 # function detect_encoding() {{{
 def detect_encoding(file, binary_mode=False):
 """
 Try to detect the encoding used by the ``file``. The ``file`` argument can
 be a PO or MO file path or a string containing the contents of the file.
 string, full or relative path to the po/mo file or its content.
 ``binary_mode``
 boolean, set this to True if ``file`` is a mo file.
 """
-rx = re.compile(r'"?Content-Type:.+? charset=([\w_\-:\.]+)')
+PATTERN = r'"?Content-Type:.+? charset=([\w_\-:\.]+)'
+rxt = re.compile(u(PATTERN))
+rxb = re.compile(b(PATTERN))
 def charset_exists(charset):
 """Check whether ``charset`` is valid or not."""
 try:
 codecs.lookup(charset)
 except LookupError:
 return False
 return True
-if not os.path.exists(file):
+if not _is_file(file):
-match = rx.search(file)
+match = rxt.search(file)
 if match:
 enc = match.group(1).strip()
 if charset_exists(enc):
 return enc
 else:
-if binary_mode:
+# For PY3, always treat as binary
+if binary_mode or PY3:
 mode = 'rb'
+rx = rxb
 else:
 mode = 'r'
+rx = rxt
 f = open(file, mode)
 for l in f.readlines():
 match = rx.search(l)
 if match:
 f.close()
 enc = match.group(1).strip()
+if not isinstance(enc, text_type):
+enc = enc.decode('utf-8')
 if charset_exists(enc):
 return enc
 f.close()
 return default_encoding
 # }}}
 # function escape() {{{
 def escape(st):
 """
 Escapes the characters ``\\\\``, ``\\t``, ``\\n``, ``\\r`` and ``"`` in
 the given string ``st`` and returns it.
 return st.replace('\\', r'\\')\
 .replace('\t', r'\t')\
 .replace('\r', r'\r')\
 .replace('\n', r'\n')\
 .replace('\"', r'\"')
 # }}}
 # function unescape() {{{
 def unescape(st):
 """
 Unescapes the characters ``\\\\``, ``\\t``, ``\\n``, ``\\r`` and ``"`` in
 the given string ``st`` and returns it.
 return '\t'
 if m == 'r':
 return '\r'
 if m == '\\':
 return '\\'
-return m # handles escaped double quote
+return m  # handles escaped double quote
 return re.sub(r'\\(\\|n|t|r|")', unescape_repl, st)
 # }}}
 # class _BaseFile {{{
 class _BaseFile(list):
 """
 Common base class for the :class:`~polib.POFile` and :class:`~polib.MOFile`
 classes. This class should **not** be instanciated directly.
 file, (optional, default: ``False``).
 """
 list.__init__(self)
 # the opened file handle
 pofile = kwargs.get('pofile', None)
-if pofile and os.path.exists(pofile):
+if pofile and _is_file(pofile):
 self.fpath = pofile
 else:
 self.fpath = kwargs.get('fpath')
 # the width at which lines should be wrapped
 self.wrapwidth = kwargs.get('wrapwidth', 78)
 [e for e in self if not e.obsolete]
 for entry in entries:
 ret.append(entry.__unicode__(self.wrapwidth))
 for entry in self.obsolete_entries():
 ret.append(entry.__unicode__(self.wrapwidth))
-ret = '\n'.join(ret)
+ret = u('\n').join(ret)
-if type(ret) != types.UnicodeType:
+assert isinstance(ret, text_type)
-return unicode(ret, self.encoding)
+#if type(ret) != text_type:
+#    return unicode(ret, self.encoding)
 return ret
-def __str__(self):
+if PY3:
-"""
+def __str__(self):
-Returns the string representation of the file.
+return self.__unicode__()
-"""
+else:
-return unicode(self).encode(self.encoding)
+def __str__(self):
+"""
+Returns the string representation of the file.
+"""
+return unicode(self).encode(self.encoding)
 def __contains__(self, entry):
 """
-Overriden ``list`` method to implement the membership test (in and
+Overridden ``list`` method to implement the membership test (in and
 not in).
 The method considers that an entry is in the file if it finds an entry
-that has the same msgid (the test is **case sensitive**).
+that has the same msgid (the test is **case sensitive**) and the same
+msgctxt (or none for both entries).
 Argument:
 ``entry``
 an instance of :class:`~polib._BaseEntry`.
 """
-return self.find(entry.msgid, by='msgid') is not None
+return self.find(entry.msgid, by='msgid', msgctxt=entry.msgctxt) \
+is not None
 def __eq__(self, other):
-return unicode(self) == unicode(other)
+return str(self) == str(other)
 def append(self, entry):
 """
-Overriden method to check for duplicates entries, if a user tries to
+Overridden method to check for duplicates entries, if a user tries to
 add an entry that is already in the file, the method will raise a
 ``ValueError`` exception.
 Argument:
 raise ValueError('Entry "%s" already exists' % entry.msgid)
 super(_BaseFile, self).append(entry)
 def insert(self, index, entry):
 """
-Overriden method to check for duplicates entries, if a user tries to
+Overridden method to check for duplicates entries, if a user tries to
 add an entry that is already in the file, the method will raise a
 ``ValueError`` exception.
 Arguments:
 e.msgstr = '\n'.join(strs) + '\n'
 if self.metadata_is_fuzzy:
 e.flags.append('fuzzy')
 return e
-def save(self, fpath=None, repr_method='__str__'):
+def save(self, fpath=None, repr_method='__unicode__'):
 """
 Saves the po file to ``fpath``.
 If it is an existing file and no ``fpath`` is provided, then the
 existing file is rewritten with the modified data.
 if fpath is None:
 fpath = self.fpath
 if repr_method == 'to_binary':
 fhandle = open(fpath, 'wb')
 else:
-fhandle = codecs.open(fpath, 'w', self.encoding)
+fhandle = io.open(fpath, 'w', encoding=self.encoding)
-if type(contents) != types.UnicodeType:
+if not isinstance(contents, text_type):
 contents = contents.decode(self.encoding)
 fhandle.write(contents)
 fhandle.close()
 # set the file path if not set
 if self.fpath is None and fpath:
 ``include_obsolete_entries``
 boolean, whether to also search in entries that are obsolete.
 ``msgctxt``
-string, allows to specify a specific message context for the
+string, allows specifying a specific message context for the
 search.
 """
 if include_obsolete_entries:
 entries = self[:]
 else:
 entries = [e for e in self if not e.obsolete]
 for e in entries:
 if getattr(e, by) == st:
-if msgctxt and e.msgctxt != msgctxt:
+if msgctxt is not False and e.msgctxt != msgctxt:
 continue
 return e
 return None
 def ordered_metadata(self):
 'PO-Revision-Date',
 'Last-Translator',
 'Language-Team',
 'MIME-Version',
 'Content-Type',
-'Content-Transfer-Encoding'
+'Content-Transfer-Encoding',
+'Language',
+'Plural-Forms'
 ]
 ordered_data = []
 for data in data_order:
 try:
 value = metadata.pop(data)
 ordered_data.append((data, value))
 except KeyError:
 pass
 # the rest of the metadata will be alphabetically ordered since there
 # are no specs for this AFAIK
-keys = metadata.keys()
+for data in sorted(metadata.keys()):
-keys.sort()
-for data in keys:
 value = metadata[data]
 ordered_data.append((data, value))
 return ordered_data
 def to_binary(self):
 """
 Return the binary representation of the file.
 """
 offsets = []
 entries = self.translated_entries()
 # the keys are sorted in the .mo file
 def cmp(_self, other):
 # msgfmt compares entries with msgctxt if it exists
-if _self.msgctxt:
+self_msgid = _self.msgctxt and _self.msgctxt or _self.msgid
-self_msgid = _self.msgctxt
+other_msgid = other.msgctxt and other.msgctxt or other.msgid
-else:
-self_msgid = _self.msgid
-if other.msgctxt:
-other_msgid = other.msgctxt
-else:
-other_msgid = other.msgid
 if self_msgid > other_msgid:
 return 1
 elif self_msgid < other_msgid:
 return -1
 else:
 return 0
 # add metadata entry
-entries.sort(cmp)
+entries.sort(key=lambda o: o.msgctxt or o.msgid)
 mentry = self.metadata_as_entry()
 #mentry.msgstr = mentry.msgstr.replace('\\n', '').lstrip()
 entries = [mentry] + entries
 entries_len = len(entries)
-ids, strs = '', ''
+ids, strs = b(''), b('')
 for e in entries:
 # For each string, we need size and file offset.  Each string is
 # NUL terminated; the NUL does not count into the size.
-msgid = ''
+msgid = b('')
 if e.msgctxt:
 # Contexts are stored by storing the concatenation of the
 # context, a <EOT> byte, and the original string
 msgid = self._encode(e.msgctxt + '\4')
 if e.msgid_plural:
-indexes = e.msgstr_plural.keys()
-indexes.sort()
 msgstr = []
-for index in indexes:
+for index in sorted(e.msgstr_plural.keys()):
 msgstr.append(e.msgstr_plural[index])
 msgid += self._encode(e.msgid + '\0' + e.msgid_plural)
 msgstr = self._encode('\0'.join(msgstr))
 else:
 msgid += self._encode(e.msgid)
 msgstr = self._encode(e.msgstr)
 offsets.append((len(ids), len(msgid), len(strs), len(msgstr)))
-ids  += msgid  + '\0'
+ids += msgid + b('\0')
-strs += msgstr + '\0'
+strs += msgstr + b('\0')
 # The header is 7 32-bit unsigned integers.
-keystart = 7*4+16*entries_len
+keystart = 7 * 4 + 16 * entries_len
 # and the values start after the keys
 valuestart = keystart + len(ids)
 koffsets = []
 voffsets = []
 # The string table first has the list of keys, then the list of values.
 # Each entry has first the size of the string, then the file offset.
 for o1, l1, o2, l2 in offsets:
-koffsets += [l1, o1+keystart]
+koffsets += [l1, o1 + keystart]
-voffsets += [l2, o2+valuestart]
+voffsets += [l2, o2 + valuestart]
 offsets = koffsets + voffsets
-# check endianness for magic number
-if struct.pack('@h', 1) == struct.pack('<h', 1):
-magic_number = MOFile.LITTLE_ENDIAN
-else:
-magic_number = MOFile.BIG_ENDIAN
 output = struct.pack(
 "Iiiiiii",
-magic_number,      # Magic number
+# Magic number
-0,                 # Version
+MOFile.MAGIC,
-entries_len,       # # of entries
+# Version
-7*4,               # start of key index
+0,
-7*4+entries_len*8, # start of value index
+# number of entries
-0, keystart        # size and offset of hash table
+entries_len,
-# Important: we don't use hash tables
+# start of key index
+7 * 4,
+# start of value index
+7 * 4 + entries_len * 8,
+# size and offset of hash table, we don't use hash tables
+0, keystart
 )
-output += array.array("i", offsets).tostring()
+if PY3 and sys.version_info.minor > 1:  # python 3.2 or superior
+output += array.array("i", offsets).tobytes()
+else:
+output += array.array("i", offsets).tostring()
 output += ids
 output += strs
 return output
 def _encode(self, mixed):
 """
 Encodes the given ``mixed`` argument with the file encoding if and
 only if it's an unicode string and returns the encoded string.
 """
-if type(mixed) == types.UnicodeType:
+if isinstance(mixed, text_type):
-return mixed.encode(self.encoding)
+mixed = mixed.encode(self.encoding)
 return mixed
 # }}}
 # class POFile {{{
 class POFile(_BaseFile):
 """
 Po (or Pot) file reader/writer.
 This class inherits the :class:`~polib._BaseFile` class and, by extension,
 """
 Returns the unicode representation of the po file.
 """
 ret, headers = '', self.header.split('\n')
 for header in headers:
-if header[:1] in [',', ':']:
+if not len(header):
+ret += "#\n"
+elif header[:1] in [',', ':']:
 ret += '#%s\n' % header
 else:
 ret += '# %s\n' % header
-if type(ret) != types.UnicodeType:
+if not isinstance(ret, text_type):
-ret = unicode(ret, self.encoding)
+ret = ret.decode(self.encoding)
 return ret + _BaseFile.__unicode__(self)
 def save_as_mofile(self, fpath):
 """
 """
 total = len([e for e in self if not e.obsolete])
 if total == 0:
 return 100
 translated = len(self.translated_entries())
-return int((100.00 / float(total)) * translated)
+return int(translated * 100 / float(total))
 def translated_entries(self):
 """
 Convenience method that returns the list of translated entries.
 """
 def untranslated_entries(self):
 """
 Convenience method that returns the list of untranslated entries.
 """
-return [e for e in self if not e.translated() and not e.obsolete \
+return [e for e in self if not e.translated() and not e.obsolete
 and not 'fuzzy' in e.flags]
 def fuzzy_entries(self):
 """
 Convenience method that returns the list of fuzzy entries.
 Keyword argument:
 ``refpot``
 object POFile, the reference catalog.
 """
+# Store entries in dict/set for faster access
+self_entries = dict((entry.msgid, entry) for entry in self)
+refpot_msgids = set(entry.msgid for entry in refpot)
+# Merge entries that are in the refpot
 for entry in refpot:
-e = self.find(entry.msgid, include_obsolete_entries=True)
+e = self_entries.get(entry.msgid)
 if e is None:
 e = POEntry()
 self.append(e)
 e.merge(entry)
 # ok, now we must "obsolete" entries that are not in the refpot anymore
 for entry in self:
-if refpot.find(entry.msgid) is None:
+if entry.msgid not in refpot_msgids:
 entry.obsolete = True
 # }}}
 # class MOFile {{{
 class MOFile(_BaseFile):
 """
 Mo file reader/writer.
 This class inherits the :class:`~polib._BaseFile` class and, by
 extension, the python ``list`` type.
 """
-BIG_ENDIAN    = 0xde120495
+MAGIC = 0x950412de
-LITTLE_ENDIAN = 0x950412de
+MAGIC_SWAPPED = 0xde120495
 def __init__(self, *args, **kwargs):
 """
 Constructor, accepts all keywords arguments accepted by
 :class:`~polib._BaseFile` class.
 def obsolete_entries(self):
 """
 Convenience method to keep the same interface with POFile instances.
 """
 return []
 # }}}
 # class _BaseEntry {{{
 class _BaseEntry(object):
 """
 Base class for :class:`~polib.POEntry` and :class:`~polib.MOEntry` classes.
 This class should **not** be instanciated directly.
 else:
 delflag = ''
 ret = []
 # write the msgctxt if any
 if self.msgctxt is not None:
-ret += self._str_field("msgctxt", delflag, "", self.msgctxt, wrapwidth)
+ret += self._str_field("msgctxt", delflag, "", self.msgctxt,
+wrapwidth)
 # write the msgid
 ret += self._str_field("msgid", delflag, "", self.msgid, wrapwidth)
 # write the msgid_plural if any
 if self.msgid_plural:
-ret += self._str_field("msgid_plural", delflag, "", self.msgid_plural, wrapwidth)
+ret += self._str_field("msgid_plural", delflag, "",
+self.msgid_plural, wrapwidth)
 if self.msgstr_plural:
 # write the msgstr_plural if any
 msgstrs = self.msgstr_plural
 keys = list(msgstrs)
 keys.sort()
 for index in keys:
 msgstr = msgstrs[index]
 plural_index = '[%s]' % index
-ret += self._str_field("msgstr", delflag, plural_index, msgstr, wrapwidth)
+ret += self._str_field("msgstr", delflag, plural_index, msgstr,
+wrapwidth)
 else:
 # otherwise write the msgstr
-ret += self._str_field("msgstr", delflag, "", self.msgstr, wrapwidth)
+ret += self._str_field("msgstr", delflag, "", self.msgstr,
+wrapwidth)
 ret.append('')
-ret = '\n'.join(ret)
+ret = u('\n').join(ret)
-if type(ret) != types.UnicodeType:
-return unicode(ret, self.encoding)
 return ret
-def __str__(self):
+if PY3:
-"""
+def __str__(self):
-Returns the string representation of the entry.
+return self.__unicode__()
-"""
+else:
-return unicode(self).encode(self.encoding)
+def __str__(self):
+"""
+Returns the string representation of the entry.
+"""
+return unicode(self).encode(self.encoding)
 def __eq__(self, other):
-return unicode(self) == unicode(other)
+return str(self) == str(other)
-def _str_field(self, fieldname, delflag, plural_index, field, wrapwidth=78):
+def _str_field(self, fieldname, delflag, plural_index, field,
+wrapwidth=78):
 lines = field.splitlines(True)
 if len(lines) > 1:
-lines = [''] + lines # start with initial empty line
+lines = [''] + lines  # start with initial empty line
 else:
 escaped_field = escape(field)
 specialchars_count = 0
 for c in ['\\', '\n', '\r', '\t', '"']:
 specialchars_count += field.count(c)
 if plural_index:
 flength += len(plural_index)
 real_wrapwidth = wrapwidth - flength + specialchars_count
 if wrapwidth > 0 and len(field) > real_wrapwidth:
 # Wrap the line but take field name into account
-lines = [''] + [unescape(item) for item in textwrap.wrap(
+lines = [''] + [unescape(item) for item in wrap(
 escaped_field,
-wrapwidth - 2, # 2 for quotes ""
+wrapwidth - 2,  # 2 for quotes ""
 drop_whitespace=False,
 break_long_words=False
 )]
 else:
 lines = [field]
 # quick and dirty trick to get the real field name
 fieldname = fieldname[9:]
 ret = ['%s%s%s "%s"' % (delflag, fieldname, plural_index,
 escape(lines.pop(0)))]
-for mstr in lines:
+for line in lines:
-ret.append('%s"%s"' % (delflag, escape(mstr)))
+ret.append('%s"%s"' % (delflag, escape(line)))
 return ret
 # }}}
 # class POEntry {{{
 class POEntry(_BaseEntry):
 """
 Represents a po file entry.
 """
 ``previous_msgid``
 string, the entry previous msgid.
 ``previous_msgid_plural``
 string, the entry previous msgid_plural.
+``linenum``
+integer, the line number of the entry
 """
 _BaseEntry.__init__(self, *args, **kwargs)
 self.comment = kwargs.get('comment', '')
 self.tcomment = kwargs.get('tcomment', '')
 self.occurrences = kwargs.get('occurrences', [])
 self.flags = kwargs.get('flags', [])
 self.previous_msgctxt = kwargs.get('previous_msgctxt', None)
 self.previous_msgid = kwargs.get('previous_msgid', None)
 self.previous_msgid_plural = kwargs.get('previous_msgid_plural', None)
+self.linenum = kwargs.get('linenum', None)
 def __unicode__(self, wrapwidth=78):
 """
 Returns the unicode representation of the entry.
 """
 for c in comments:
 val = getattr(self, c[0])
 if val:
 for comment in val.split('\n'):
 if wrapwidth > 0 and len(comment) + len(c[1]) > wrapwidth:
-ret += textwrap.wrap(
+ret += wrap(
 comment,
 wrapwidth,
 initial_indent=c[1],
 subsequent_indent=c[1],
 break_long_words=False
 if wrapwidth > 0 and len(filestr) + 3 > wrapwidth:
 # textwrap split words that contain hyphen, this is not
 # what we want for filenames, so the dirty hack is to
 # temporally replace hyphens with a char that a file cannot
 # contain, like "*"
-ret += [l.replace('*', '-') for l in textwrap.wrap(
+ret += [l.replace('*', '-') for l in wrap(
 filestr.replace('-', '*'),
 wrapwidth,
 initial_indent='#: ',
 subsequent_indent='#: ',
 break_long_words=False
 # flags (TODO: wrapping ?)
 if self.flags:
 ret.append('#, %s' % ', '.join(self.flags))
 # previous context and previous msgid/msgid_plural
-fields = ['previous_msgctxt', 'previous_msgid', 'previous_msgid_plural']
+fields = ['previous_msgctxt', 'previous_msgid',
+'previous_msgid_plural']
 for f in fields:
 val = getattr(self, f)
 if val:
 ret += self._str_field(f, "#| ", "", val, wrapwidth)
 ret.append(_BaseEntry.__unicode__(self, wrapwidth))
-ret = '\n'.join(ret)
+ret = u('\n').join(ret)
-if type(ret) != types.UnicodeType:
+assert isinstance(ret, text_type)
-return unicode(ret, self.encoding)
+#if type(ret) != types.UnicodeType:
+#    return unicode(ret, self.encoding)
 return ret
 def __cmp__(self, other):
 """
 Called by comparison operations if rich comparison is not defined.
 """
-def compare_occurrences(a, b):
-"""
-Compare an entry occurrence with another one.
-"""
-if a[0] != b[0]:
-return a[0] < b[0]
-if a[1] != b[1]:
-return a[1] < b[1]
-return 0
 # First: Obsolete test
 if self.obsolete != other.obsolete:
 if self.obsolete:
 return -1
 else:
 return 1
 # Work on a copy to protect original
-occ1 = self.occurrences[:]
+occ1 = sorted(self.occurrences[:])
-occ2 = other.occurrences[:]
+occ2 = sorted(other.occurrences[:])
-# Sorting using compare method
-occ1.sort(compare_occurrences)
-occ2.sort(compare_occurrences)
-# Comparing sorted occurrences
 pos = 0
 for entry1 in occ1:
 try:
 entry2 = occ2[pos]
 except IndexError:
 if entry1[1] != entry2[1]:
 if entry1[1] > entry2[1]:
 return 1
 else:
 return -1
+# Compare msgid_plural if set
+if self.msgid_plural:
+if not other.msgid_plural:
+return 1
+for pos in self.msgid_plural:
+if pos not in other.msgid_plural:
+return 1
+if self.msgid_plural[pos] > other.msgid_plural[pos]:
+return 1
+if self.msgid_plural[pos] < other.msgid_plural[pos]:
+return -1
 # Finally: Compare message ID
-if self.msgid > other.msgid: return 1
+if self.msgid > other.msgid:
-else: return -1
+return 1
+elif self.msgid < other.msgid:
+return -1
+return 0
+def __gt__(self, other):
+return self.__cmp__(other) > 0
+def __lt__(self, other):
+return self.__cmp__(other) < 0
+def __ge__(self, other):
+return self.__cmp__(other) >= 0
+def __le__(self, other):
+return self.__cmp__(other) <= 0
+def __eq__(self, other):
+return self.__cmp__(other) == 0
+def __ne__(self, other):
+return self.__cmp__(other) != 0
 def translated(self):
 """
 Returns ``True`` if the entry has been translated or ``False``
 otherwise.
 # keep existing translation at pos if any
 self.msgstr_plural[pos]
 except KeyError:
 self.msgstr_plural[pos] = ''
+def __hash__(self):
+return hash((self.msgid, self.msgstr))
 # }}}
 # class MOEntry {{{
 class MOEntry(_BaseEntry):
 """
 Represents a mo file entry.
 """
-pass
+def __init__(self, *args, **kwargs):
+"""
+Constructor, accepts the following keyword arguments,
+for consistency with :class:`~polib.POEntry`:
+``comment``
+``tcomment``
+``occurrences``
+``flags``
+``previous_msgctxt``
+``previous_msgid``
+``previous_msgid_plural``
+Note: even though these keyword arguments are accepted,
+they hold no real meaning in the context of MO files
+and are simply ignored.
+"""
+_BaseEntry.__init__(self, *args, **kwargs)
+self.comment = ''
+self.tcomment = ''
+self.occurrences = []
+self.flags = []
+self.previous_msgctxt = None
+self.previous_msgid = None
+self.previous_msgid_plural = None
+def __hash__(self):
+return hash((self.msgid, self.msgstr))
 # }}}
 # class _POFileParser {{{
 class _POFileParser(object):
 """
 A finite state machine to parse efficiently and correctly po
 file format.
 ``check_for_duplicates``
 whether to check for duplicate entries when adding entries to the
 file (optional, default: ``False``).
 """
 enc = kwargs.get('encoding', default_encoding)
-if os.path.exists(pofile):
+if _is_file(pofile):
 try:
-self.fhandle = codecs.open(pofile, 'rU', enc)
+self.fhandle = io.open(pofile, 'rt', encoding=enc)
 except LookupError:
 enc = default_encoding
-self.fhandle = codecs.open(pofile, 'rU', enc)
+self.fhandle = io.open(pofile, 'rt', encoding=enc)
 else:
 self.fhandle = pofile.splitlines()
-self.instance = POFile(
+klass = kwargs.get('klass')
+if klass is None:
+klass = POFile
+self.instance = klass(
 pofile=pofile,
 encoding=enc,
 check_for_duplicates=kwargs.get('check_for_duplicates', False)
 )
 self.transitions = {}
-self.current_entry = POEntry()
+self.current_line = 0
-self.current_state = 'ST'
+self.current_entry = POEntry(linenum=self.current_line)
+self.current_state = 'st'
 self.current_token = None
 # two memo flags used in handlers
 self.msgstr_index = 0
 self.entry_obsolete = 0
 # Configure the state machine, by adding transitions.
 # Signification of symbols:
 #     * ST: Beginning of the file (start)
 #     * HE: Header
 #     * TC: a translation comment
 #     * GC: a generated comment
-#     * OC: a file/line occurence
+#     * OC: a file/line occurrence
 #     * FL: a flags line
 #     * CT: a message context
 #     * PC: a previous msgctxt
 #     * PM: a previous msgid
 #     * PP: a previous msgid_plural
 #     * MI: a msgid
 #     * MP: a msgid plural
 #     * MS: a msgstr
 #     * MX: a msgstr plural
 #     * MC: a msgid or msgstr continuation line
-all = ['ST', 'HE', 'GC', 'OC', 'FL', 'CT', 'PC', 'PM', 'PP', 'TC',
+all = ['st', 'he', 'gc', 'oc', 'fl', 'ct', 'pc', 'pm', 'pp', 'tc',
-'MS', 'MP', 'MX', 'MI']
+'ms', 'mp', 'mx', 'mi']
-self.add('TC', ['ST', 'HE'],                                     'HE')
+self.add('tc', ['st', 'he'],                                     'he')
-self.add('TC', ['GC', 'OC', 'FL', 'TC', 'PC', 'PM', 'PP', 'MS',
+self.add('tc', ['gc', 'oc', 'fl', 'tc', 'pc', 'pm', 'pp', 'ms',
-'MP', 'MX', 'MI'],                               'TC')
+'mp', 'mx', 'mi'],                               'tc')
-self.add('GC', all,                                              'GC')
+self.add('gc', all,                                              'gc')
-self.add('OC', all,                                              'OC')
+self.add('oc', all,                                              'oc')
-self.add('FL', all,                                              'FL')
+self.add('fl', all,                                              'fl')
-self.add('PC', all,                                              'PC')
+self.add('pc', all,                                              'pc')
-self.add('PM', all,                                              'PM')
+self.add('pm', all,                                              'pm')
-self.add('PP', all,                                              'PP')
+self.add('pp', all,                                              'pp')
-self.add('CT', ['ST', 'HE', 'GC', 'OC', 'FL', 'TC', 'PC', 'PM',
+self.add('ct', ['st', 'he', 'gc', 'oc', 'fl', 'tc', 'pc', 'pm',
-'PP', 'MS', 'MX'],                               'CT')
+'pp', 'ms', 'mx'],                               'ct')
-self.add('MI', ['ST', 'HE', 'GC', 'OC', 'FL', 'CT', 'TC', 'PC',
+self.add('mi', ['st', 'he', 'gc', 'oc', 'fl', 'ct', 'tc', 'pc',
-'PM', 'PP', 'MS', 'MX'],                                'MI')
+'pm', 'pp', 'ms', 'mx'],                                'mi')
-self.add('MP', ['TC', 'GC', 'PC', 'PM', 'PP', 'MI'],             'MP')
+self.add('mp', ['tc', 'gc', 'pc', 'pm', 'pp', 'mi'],             'mp')
-self.add('MS', ['MI', 'MP', 'TC'],                               'MS')
+self.add('ms', ['mi', 'mp', 'tc'],                               'ms')
-self.add('MX', ['MI', 'MX', 'MP', 'TC'],                         'MX')
+self.add('mx', ['mi', 'mx', 'mp', 'tc'],                         'mx')
-self.add('MC', ['CT', 'MI', 'MP', 'MS', 'MX', 'PM', 'PP', 'PC'], 'MC')
+self.add('mc', ['ct', 'mi', 'mp', 'ms', 'mx', 'pm', 'pp', 'pc'], 'mc')
 def parse(self):
 """
 Run the state machine, parse the file line by line and call process()
 with the current matched symbol.
 """
-i = 0
 keywords = {
-'msgctxt': 'CT',
+'msgctxt': 'ct',
-'msgid': 'MI',
+'msgid': 'mi',
-'msgstr': 'MS',
+'msgstr': 'ms',
-'msgid_plural': 'MP',
+'msgid_plural': 'mp',
 }
 prev_keywords = {
-'msgid_plural': 'PP',
+'msgid_plural': 'pp',
-'msgid': 'PM',
+'msgid': 'pm',
-'msgctxt': 'PC',
+'msgctxt': 'pc',
 }
+tokens = []
 for line in self.fhandle:
-i += 1
+self.current_line += 1
 line = line.strip()
 if line == '':
 continue
 tokens = line.split(None, 2)
 nb_tokens = len(tokens)
+if tokens[0] == '#~|':
+continue
 if tokens[0] == '#~' and nb_tokens > 1:
 line = line[3:].strip()
 tokens = tokens[1:]
 nb_tokens -= 1
 # Take care of keywords like
 # msgid, msgid_plural, msgctxt & msgstr.
 if tokens[0] in keywords and nb_tokens > 1:
 line = line[len(tokens[0]):].lstrip()
+if re.search(r'([^\\]|^)"', line[1:-1]):
+raise IOError('Syntax error in po file %s (line %s): '
+'unescaped double quote found' %
+(self.instance.fpath, self.current_line))
 self.current_token = line
-self.process(keywords[tokens[0]], i)
+self.process(keywords[tokens[0]])
 continue
 self.current_token = line
-if tokens[0] == '#:' and nb_tokens > 1:
+if tokens[0] == '#:':
+if nb_tokens <= 1:
+continue
 # we are on a occurrences line
-self.process('OC', i)
+self.process('oc')
 elif line[:1] == '"':
 # we are on a continuation line
-self.process('MC', i)
+if re.search(r'([^\\]|^)"', line[1:-1]):
+raise IOError('Syntax error in po file %s (line %s): '
+'unescaped double quote found' %
+(self.instance.fpath, self.current_line))
+self.process('mc')
 elif line[:7] == 'msgstr[':
 # we are on a msgstr plural
-self.process('MX', i)
+self.process('mx')
-elif tokens[0] == '#,' and nb_tokens > 1:
+elif tokens[0] == '#,':
+if nb_tokens <= 1:
+continue
 # we are on a flags line
-self.process('FL', i)
+self.process('fl')
-elif tokens[0] == '#':
+elif tokens[0] == '#' or tokens[0].startswith('##'):
-if line == '#': line += ' '
+if line == '#':
+line += ' '
 # we are on a translator comment line
-self.process('TC', i)
+self.process('tc')
-elif tokens[0] == '#.' and nb_tokens > 1:
+elif tokens[0] == '#.':
+if nb_tokens <= 1:
+continue
 # we are on a generated comment line
-self.process('GC', i)
+self.process('gc')
 elif tokens[0] == '#|':
-if nb_tokens < 2:
+if nb_tokens <= 1:
-self.process('??', i)
+raise IOError('Syntax error in po file %s (line %s)' %
-continue
+(self.instance.fpath, self.current_line))
 # Remove the marker and any whitespace right after that.
 line = line[2:].lstrip()
 self.current_token = line
 if tokens[1].startswith('"'):
 # Continuation of previous metadata.
-self.process('MC', i)
+self.process('mc')
 continue
 if nb_tokens == 2:
 # Invalid continuation line.
-self.process('??', i)
+raise IOError('Syntax error in po file %s (line %s): '
+'invalid continuation line' %
+(self.instance.fpath, self.current_line))
 # we are on a "previous translation" comment line,
 if tokens[1] not in prev_keywords:
 # Unknown keyword in previous translation comment.
-self.process('??', i)
+raise IOError('Syntax error in po file %s (line %s): '
+'unknown keyword %s' %
+(self.instance.fpath, self.current_line,
+tokens[1]))
 # Remove the keyword and any whitespace
 # between it and the starting quote.
 line = line[len(tokens[1]):].lstrip()
 self.current_token = line
-self.process(prev_keywords[tokens[1]], i)
+self.process(prev_keywords[tokens[1]])
 else:
-self.process('??', i)
+raise IOError('Syntax error in po file %s (line %s)' %
+(self.instance.fpath, self.current_line))
-if self.current_entry:
+if self.current_entry and len(tokens) > 0 and \
+not tokens[0].startswith('#'):
 # since entries are added when another entry is found, we must add
-# the last entry here (only if there are lines)
+# the last entry here (only if there are lines). Trailing comments
+# are ignored
 self.instance.append(self.current_entry)
 # before returning the instance, check if there's metadata and if
 # so extract it in a dict
-firstentry = self.instance[0]
+metadataentry = self.instance.find('')
-if firstentry.msgid == '': # metadata found
+if metadataentry:  # metadata found
 # remove the entry
-firstentry = self.instance.pop(0)
+self.instance.remove(metadataentry)
-self.instance.metadata_is_fuzzy = firstentry.flags
+self.instance.metadata_is_fuzzy = metadataentry.flags
 key = None
-for msg in firstentry.msgstr.splitlines():
+for msg in metadataentry.msgstr.splitlines():
 try:
 key, val = msg.split(':', 1)
 self.instance.metadata[key] = val.strip()
-except:
+except (ValueError, KeyError):
 if key is not None:
-self.instance.metadata[key] += '\n'+ msg.strip()
+self.instance.metadata[key] += '\n' + msg.strip()
 # close opened file
-if isinstance(self.fhandle, file):
+if not isinstance(self.fhandle, list):  # must be file
 self.fhandle.close()
 return self.instance
 def add(self, symbol, states, next_state):
 """
 ``next_state``
 the next state the fsm will have after the action.
 """
 for state in states:
-action = getattr(self, 'handle_%s' % next_state.lower())
+action = getattr(self, 'handle_%s' % next_state)
 self.transitions[(symbol, state)] = (action, next_state)
-def process(self, symbol, linenum):
+def process(self, symbol):
 """
 Process the transition corresponding to the current state and the
 symbol provided.
 Keywords arguments:
 """
 try:
 (action, state) = self.transitions[(symbol, self.current_state)]
 if action():
 self.current_state = state
-except Exception as exc:
+except Exception:
-raise IOError('Syntax error in po file (line %s)' % linenum)
+raise IOError('Syntax error in po file (line %s)' %
+self.current_line)
 # state handlers
 def handle_he(self):
 """Handle a header comment."""
 self.instance.header += self.current_token[2:]
 return 1
 def handle_tc(self):
 """Handle a translator comment."""
-if self.current_state in ['MC', 'MS', 'MX']:
+if self.current_state in ['mc', 'ms', 'mx']:
 self.instance.append(self.current_entry)
-self.current_entry = POEntry()
+self.current_entry = POEntry(linenum=self.current_line)
 if self.current_entry.tcomment != '':
 self.current_entry.tcomment += '\n'
-self.current_entry.tcomment += self.current_token[2:]
+tcomment = self.current_token.lstrip('#')
+if tcomment.startswith(' '):
+tcomment = tcomment[1:]
+self.current_entry.tcomment += tcomment
 return True
 def handle_gc(self):
 """Handle a generated comment."""
-if self.current_state in ['MC', 'MS', 'MX']:
+if self.current_state in ['mc', 'ms', 'mx']:
 self.instance.append(self.current_entry)
-self.current_entry = POEntry()
+self.current_entry = POEntry(linenum=self.current_line)
 if self.current_entry.comment != '':
 self.current_entry.comment += '\n'
 self.current_entry.comment += self.current_token[3:]
 return True
 def handle_oc(self):
-"""Handle a file:num occurence."""
+"""Handle a file:num occurrence."""
-if self.current_state in ['MC', 'MS', 'MX']:
+if self.current_state in ['mc', 'ms', 'mx']:
 self.instance.append(self.current_entry)
-self.current_entry = POEntry()
+self.current_entry = POEntry(linenum=self.current_line)
 occurrences = self.current_token[3:].split()
 for occurrence in occurrences:
 if occurrence != '':
 try:
 fil, line = occurrence.split(':')
 if not line.isdigit():
-fil  = fil + line
+fil = fil + line
 line = ''
 self.current_entry.occurrences.append((fil, line))
-except:
+except (ValueError, AttributeError):
 self.current_entry.occurrences.append((occurrence, ''))
 return True
 def handle_fl(self):
 """Handle a flags line."""
-if self.current_state in ['MC', 'MS', 'MX']:
+if self.current_state in ['mc', 'ms', 'mx']:
 self.instance.append(self.current_entry)
-self.current_entry = POEntry()
+self.current_entry = POEntry(linenum=self.current_line)
-self.current_entry.flags += self.current_token[3:].split(', ')
+self.current_entry.flags += [c.strip() for c in
+self.current_token[3:].split(',')]
 return True
 def handle_pp(self):
 """Handle a previous msgid_plural line."""
-if self.current_state in ['MC', 'MS', 'MX']:
+if self.current_state in ['mc', 'ms', 'mx']:
 self.instance.append(self.current_entry)
-self.current_entry = POEntry()
+self.current_entry = POEntry(linenum=self.current_line)
 self.current_entry.previous_msgid_plural = \
 unescape(self.current_token[1:-1])
 return True
 def handle_pm(self):
 """Handle a previous msgid line."""
-if self.current_state in ['MC', 'MS', 'MX']:
+if self.current_state in ['mc', 'ms', 'mx']:
 self.instance.append(self.current_entry)
-self.current_entry = POEntry()
+self.current_entry = POEntry(linenum=self.current_line)
 self.current_entry.previous_msgid = \
 unescape(self.current_token[1:-1])
 return True
 def handle_pc(self):
 """Handle a previous msgctxt line."""
-if self.current_state in ['MC', 'MS', 'MX']:
+if self.current_state in ['mc', 'ms', 'mx']:
 self.instance.append(self.current_entry)
-self.current_entry = POEntry()
+self.current_entry = POEntry(linenum=self.current_line)
 self.current_entry.previous_msgctxt = \
 unescape(self.current_token[1:-1])
 return True
 def handle_ct(self):
 """Handle a msgctxt."""
-if self.current_state in ['MC', 'MS', 'MX']:
+if self.current_state in ['mc', 'ms', 'mx']:
 self.instance.append(self.current_entry)
-self.current_entry = POEntry()
+self.current_entry = POEntry(linenum=self.current_line)
 self.current_entry.msgctxt = unescape(self.current_token[1:-1])
 return True
 def handle_mi(self):
 """Handle a msgid."""
-if self.current_state in ['MC', 'MS', 'MX']:
+if self.current_state in ['mc', 'ms', 'mx']:
 self.instance.append(self.current_entry)
-self.current_entry = POEntry()
+self.current_entry = POEntry(linenum=self.current_line)
 self.current_entry.obsolete = self.entry_obsolete
 self.current_entry.msgid = unescape(self.current_token[1:-1])
 return True
 def handle_mp(self):
 self.current_entry.msgstr = unescape(self.current_token[1:-1])
 return True
 def handle_mx(self):
 """Handle a msgstr plural."""
-index, value = self.current_token[7], self.current_token[11:-1]
+index = self.current_token[7]
-self.current_entry.msgstr_plural[index] = unescape(value)
+value = self.current_token[self.current_token.find('"') + 1:-1]
-self.msgstr_index = index
+self.current_entry.msgstr_plural[int(index)] = unescape(value)
+self.msgstr_index = int(index)
 return True
 def handle_mc(self):
 """Handle a msgid or msgstr continuation line."""
 token = unescape(self.current_token[1:-1])
-if self.current_state == 'CT':
+if self.current_state == 'ct':
-typ = 'msgctxt'
 self.current_entry.msgctxt += token
-elif self.current_state == 'MI':
+elif self.current_state == 'mi':
-typ = 'msgid'
 self.current_entry.msgid += token
-elif self.current_state == 'MP':
+elif self.current_state == 'mp':
-typ = 'msgid_plural'
 self.current_entry.msgid_plural += token
-elif self.current_state == 'MS':
+elif self.current_state == 'ms':
-typ = 'msgstr'
 self.current_entry.msgstr += token
-elif self.current_state == 'MX':
+elif self.current_state == 'mx':
-typ = 'msgstr[%s]' % self.msgstr_index
 self.current_entry.msgstr_plural[self.msgstr_index] += token
-elif self.current_state == 'PP':
+elif self.current_state == 'pp':
-typ = 'previous_msgid_plural'
-token = token[3:]
 self.current_entry.previous_msgid_plural += token
-elif self.current_state == 'PM':
+elif self.current_state == 'pm':
-typ = 'previous_msgid'
-token = token[3:]
 self.current_entry.previous_msgid += token
-elif self.current_state == 'PC':
+elif self.current_state == 'pc':
-typ = 'previous_msgctxt'
-token = token[3:]
 self.current_entry.previous_msgctxt += token
 # don't change the current state
 return False
 # }}}
 # class _MOFileParser {{{
 class _MOFileParser(object):
 """
 A class to parse binary mo files.
 """
 ``check_for_duplicates``
 whether to check for duplicate entries when adding entries to the
 file (optional, default: ``False``).
 """
 self.fhandle = open(mofile, 'rb')
-self.instance = MOFile(
+klass = kwargs.get('klass')
+if klass is None:
+klass = MOFile
+self.instance = klass(
 fpath=mofile,
 encoding=kwargs.get('encoding', default_encoding),
 check_for_duplicates=kwargs.get('check_for_duplicates', False)
 )
+def __del__(self):
+"""
+Make sure the file is closed, this prevents warnings on unclosed file
+when running tests with python >= 3.2.
+"""
+if self.fhandle:
+self.fhandle.close()
 def parse(self):
 """
 Build the instance with the file handle provided in the
 constructor.
 """
 # parse magic number
 magic_number = self._readbinary('<I', 4)
-if magic_number == MOFile.LITTLE_ENDIAN:
+if magic_number == MOFile.MAGIC:
 ii = '<II'
-elif magic_number == MOFile.BIG_ENDIAN:
+elif magic_number == MOFile.MAGIC_SWAPPED:
 ii = '>II'
 else:
 raise IOError('Invalid mo file, magic number is incorrect !')
 self.instance.magic_number = magic_number
 # parse the version number and the number of strings
-self.instance.version, numofstrings = self._readbinary(ii, 8)
+version, numofstrings = self._readbinary(ii, 8)
+# from MO file format specs: "A program seeing an unexpected major
+# revision number should stop reading the MO file entirely"
+if version not in (0, 1):
+raise IOError('Invalid mo file, unexpected major revision number')
+self.instance.version = version
 # original strings and translation strings hash table offset
 msgids_hash_offset, msgstrs_hash_offset = self._readbinary(ii, 8)
 # move to msgid hash table and read length and offset of msgids
 self.fhandle.seek(msgids_hash_offset)
 msgids_index = []
 self.fhandle.seek(msgstrs_hash_offset)
 msgstrs_index = []
 for i in range(numofstrings):
 msgstrs_index.append(self._readbinary(ii, 8))
 # build entries
+encoding = self.instance.encoding
 for i in range(numofstrings):
 self.fhandle.seek(msgids_index[i][1])
 msgid = self.fhandle.read(msgids_index[i][0])
 self.fhandle.seek(msgstrs_index[i][1])
 msgstr = self.fhandle.read(msgstrs_index[i][0])
-if i == 0: # metadata
+if i == 0 and not msgid:  # metadata
-raw_metadata, metadata = msgstr.split('\n'), {}
+raw_metadata, metadata = msgstr.split(b('\n')), {}
 for line in raw_metadata:
-tokens = line.split(':', 1)
+tokens = line.split(b(':'), 1)
-if tokens[0] != '':
+if tokens[0] != b(''):
 try:
-metadata[tokens[0]] = tokens[1].strip()
+k = tokens[0].decode(encoding)
+v = tokens[1].decode(encoding)
+metadata[k] = v.strip()
 except IndexError:
-metadata[tokens[0]] = ''
+metadata[k] = u('')
 self.instance.metadata = metadata
 continue
 # test if we have a plural entry
-msgid_tokens = msgid.split('\0')
+msgid_tokens = msgid.split(b('\0'))
 if len(msgid_tokens) > 1:
 entry = self._build_entry(
 msgid=msgid_tokens[0],
 msgid_plural=msgid_tokens[1],
-msgstr_plural=dict((k,v) for k,v in enumerate(msgstr.split('\0')))
+msgstr_plural=dict((k, v) for k, v in
+enumerate(msgstr.split(b('\0'))))
 )
 else:
 entry = self._build_entry(msgid=msgid, msgstr=msgstr)
 self.instance.append(entry)
 # close opened file
 self.fhandle.close()
 return self.instance
 def _build_entry(self, msgid, msgstr=None, msgid_plural=None,
 msgstr_plural=None):
-msgctxt_msgid = msgid.split('\x04')
+msgctxt_msgid = msgid.split(b('\x04'))
+encoding = self.instance.encoding
 if len(msgctxt_msgid) > 1:
 kwargs = {
-'msgctxt': msgctxt_msgid[0],
+'msgctxt': msgctxt_msgid[0].decode(encoding),
-'msgid'  : msgctxt_msgid[1],
+'msgid': msgctxt_msgid[1].decode(encoding),
 }
 else:
-kwargs = {'msgid': msgid}
+kwargs = {'msgid': msgid.decode(encoding)}
 if msgstr:
-kwargs['msgstr'] = msgstr
+kwargs['msgstr'] = msgstr.decode(encoding)
 if msgid_plural:
-kwargs['msgid_plural'] = msgid_plural
+kwargs['msgid_plural'] = msgid_plural.decode(encoding)
 if msgstr_plural:
+for k in msgstr_plural:
+msgstr_plural[k] = msgstr_plural[k].decode(encoding)
 kwargs['msgstr_plural'] = msgstr_plural
 return MOEntry(**kwargs)
 def _readbinary(self, fmt, numbytes):
 """
 bytes = self.fhandle.read(numbytes)
 tup = struct.unpack(fmt, bytes)
 if len(tup) == 1:
 return tup[0]
 return tup
 # }}}
+# class TextWrapper {{{
+class TextWrapper(textwrap.TextWrapper):
+"""
+Subclass of textwrap.TextWrapper that backport the
+drop_whitespace option.
+"""
+def __init__(self, *args, **kwargs):
+drop_whitespace = kwargs.pop('drop_whitespace', True)
+textwrap.TextWrapper.__init__(self, *args, **kwargs)
+self.drop_whitespace = drop_whitespace
+def _wrap_chunks(self, chunks):
+"""_wrap_chunks(chunks : [string]) -> [string]
+Wrap a sequence of text chunks and return a list of lines of
+length 'self.width' or less.  (If 'break_long_words' is false,
+some lines may be longer than this.)  Chunks correspond roughly
+to words and the whitespace between them: each chunk is
+indivisible (modulo 'break_long_words'), but a line break can
+come between any two chunks.  Chunks should not have internal
+whitespace; ie. a chunk is either all whitespace or a "word".
+Whitespace chunks will be removed from the beginning and end of
+lines, but apart from that whitespace is preserved.
+"""
+lines = []
+if self.width <= 0:
+raise ValueError("invalid width %r (must be > 0)" % self.width)
+# Arrange in reverse order so items can be efficiently popped
+# from a stack of chucks.
+chunks.reverse()
+while chunks:
+# Start the list of chunks that will make up the current line.
+# cur_len is just the length of all the chunks in cur_line.
+cur_line = []
+cur_len = 0
+# Figure out which static string will prefix this line.
+if lines:
+indent = self.subsequent_indent
+else:
+indent = self.initial_indent
+# Maximum width for this line.
+width = self.width - len(indent)
+# First chunk on line is whitespace -- drop it, unless this
+# is the very beginning of the text (ie. no lines started yet).
+if self.drop_whitespace and chunks[-1].strip() == '' and lines:
+del chunks[-1]
+while chunks:
+l = len(chunks[-1])
+# Can at least squeeze this chunk onto the current line.
+if cur_len + l <= width:
+cur_line.append(chunks.pop())
+cur_len += l
+# Nope, this line is full.
+else:
+break
+# The current line is full, and the next chunk is too big to
+# fit on *any* line (not just this one).
+if chunks and len(chunks[-1]) > width:
+self._handle_long_word(chunks, cur_line, cur_len, width)
+# If the last chunk on this line is all whitespace, drop it.
+if self.drop_whitespace and cur_line and not cur_line[-1].strip():
+del cur_line[-1]
+# Convert current line back to a string and store it in list
+# of all lines (return value).
+if cur_line:
+lines.append(indent + ''.join(cur_line))
+return lines
+# }}}
+# function wrap() {{{
+def wrap(text, width=70, **kwargs):
+"""
+Wrap a single paragraph of text, returning a list of wrapped lines.
+"""
+if sys.version_info < (2, 6):
+return TextWrapper(width=width, **kwargs).wrap(text)
+return textwrap.wrap(text, width=width, **kwargs)
+# }}}

changeset 40185	19fc5a986669
parent 32889	a7310a477966
child 43076	2372284d9457