i18n/polib.py
changeset 11387 181936ec9bfb
child 11388 db957a72fbd7
equal deleted inserted replaced
11385:e5a2134c083b 11387:181936ec9bfb
       
     1 #!/usr/bin/env python
       
     2 # -*- coding: utf-8 -*-
       
     3 #
       
     4 # License: MIT (see LICENSE file provided)
       
     5 # vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4:
       
     6 
       
     7 """
       
     8 **polib** allows you to manipulate, create, modify gettext files (pot, po
       
     9 and mo files).  You can load existing files, iterate through it's entries,
       
    10 add, modify entries, comments or metadata, etc... or create new po files
       
    11 from scratch.
       
    12 
       
    13 **polib** provides a simple and pythonic API, exporting only three
       
    14 convenience functions (*pofile*, *mofile* and *detect_encoding*), and the
       
    15 four core classes, *POFile*, *MOFile*, *POEntry* and *MOEntry* for creating
       
    16 new files/entries.
       
    17 
       
    18 **Basic example**:
       
    19 
       
    20 >>> import polib
       
    21 >>> # load an existing po file
       
    22 >>> po = polib.pofile('tests/test_utf8.po')
       
    23 >>> for entry in po:
       
    24 ...     # do something with entry...
       
    25 ...     pass
       
    26 >>> # add an entry
       
    27 >>> entry = polib.POEntry(msgid='Welcome', msgstr='Bienvenue')
       
    28 >>> entry.occurrences = [('welcome.py', '12'), ('anotherfile.py', '34')]
       
    29 >>> po.append(entry)
       
    30 >>> # to save our modified po file:
       
    31 >>> # po.save()
       
    32 >>> # or you may want to compile the po file
       
    33 >>> # po.save_as_mofile('tests/test_utf8.mo')
       
    34 """
       
    35 
       
    36 __author__    = 'David JEAN LOUIS <izimobil@gmail.com>'
       
    37 __version__   = '0.5.2'
       
    38 __all__       = ['pofile', 'POFile', 'POEntry', 'mofile', 'MOFile', 'MOEntry',
       
    39                  'detect_encoding', 'escape', 'unescape', 'detect_encoding',]
       
    40 
       
    41 import codecs
       
    42 import struct
       
    43 import textwrap
       
    44 import types
       
    45 
       
    46 default_encoding = 'utf-8'
       
    47 
       
    48 # function pofile() {{{
       
    49 
       
    50 def pofile(fpath, **kwargs):
       
    51     """
       
    52     Convenience function that parse the po/pot file *fpath* and return
       
    53     a POFile instance.
       
    54 
       
    55     **Keyword arguments**:
       
    56       - *fpath*: string, full or relative path to the po/pot file to parse
       
    57       - *wrapwidth*: integer, the wrap width, only useful when -w option was
       
    58         passed to xgettext (optional, default to 78)
       
    59       - *autodetect_encoding*: boolean, if set to False the function will
       
    60         not try to detect the po file encoding (optional, default to True)
       
    61       - *encoding*: string, an encoding, only relevant if autodetect_encoding
       
    62         is set to False
       
    63       - *check_for_duplicates*: whether to check for duplicate entries when
       
    64         adding entries to the file, default: False (optional)
       
    65 
       
    66     **Example**:
       
    67 
       
    68     >>> import polib
       
    69     >>> po = polib.pofile('tests/test_weird_occurrences.po',
       
    70     ...     check_for_duplicates=True)
       
    71     >>> po #doctest: +ELLIPSIS
       
    72     <POFile instance at ...>
       
    73     >>> import os, tempfile
       
    74     >>> all_attrs = ('msgctxt', 'msgid', 'msgstr', 'msgid_plural', 
       
    75     ...              'msgstr_plural', 'obsolete', 'comment', 'tcomment', 
       
    76     ...              'occurrences', 'flags', 'previous_msgctxt', 
       
    77     ...              'previous_msgid', 'previous_msgid_plural')
       
    78     >>> for fname in ['test_iso-8859-15.po', 'test_utf8.po']:
       
    79     ...     orig_po = polib.pofile('tests/'+fname)
       
    80     ...     tmpf = tempfile.NamedTemporaryFile().name
       
    81     ...     orig_po.save(tmpf)
       
    82     ...     try:
       
    83     ...         new_po = polib.pofile(tmpf)
       
    84     ...         for old, new in zip(orig_po, new_po):
       
    85     ...             for attr in all_attrs:
       
    86     ...                 if getattr(old, attr) != getattr(new, attr):
       
    87     ...                     getattr(old, attr)
       
    88     ...                     getattr(new, attr)
       
    89     ...     finally:
       
    90     ...         os.unlink(tmpf)
       
    91     >>> po_file = polib.pofile('tests/test_save_as_mofile.po')
       
    92     >>> tmpf = tempfile.NamedTemporaryFile().name
       
    93     >>> po_file.save_as_mofile(tmpf)
       
    94     >>> try:
       
    95     ...     mo_file = polib.mofile(tmpf)
       
    96     ...     for old, new in zip(po_file, mo_file):
       
    97     ...         if po_file._encode(old.msgid) != mo_file._encode(new.msgid):
       
    98     ...             'OLD: ', po_file._encode(old.msgid)
       
    99     ...             'NEW: ', mo_file._encode(new.msgid)
       
   100     ...         if po_file._encode(old.msgstr) != mo_file._encode(new.msgstr):
       
   101     ...             'OLD: ', po_file._encode(old.msgstr)
       
   102     ...             'NEW: ', mo_file._encode(new.msgstr)
       
   103     ...             print new.msgstr
       
   104     ... finally:
       
   105     ...     os.unlink(tmpf)
       
   106     """
       
   107     if kwargs.get('autodetect_encoding', True) == True:
       
   108         enc = detect_encoding(fpath)
       
   109     else:
       
   110         enc = kwargs.get('encoding', default_encoding)
       
   111     check_for_duplicates = kwargs.get('check_for_duplicates', False)
       
   112     parser = _POFileParser(
       
   113         fpath,
       
   114         encoding=enc,
       
   115         check_for_duplicates=kwargs.get('check_for_duplicates', False)
       
   116     )
       
   117     instance = parser.parse()
       
   118     instance.wrapwidth = kwargs.get('wrapwidth', 78)
       
   119     return instance
       
   120 
       
   121 # }}}
       
   122 # function mofile() {{{
       
   123 
       
   124 def mofile(fpath, **kwargs):
       
   125     """
       
   126     Convenience function that parse the mo file *fpath* and return
       
   127     a MOFile instance.
       
   128 
       
   129     **Keyword arguments**:
       
   130       - *fpath*: string, full or relative path to the mo file to parse
       
   131       - *wrapwidth*: integer, the wrap width, only useful when -w option was
       
   132         passed to xgettext to generate the po file that was used to format
       
   133         the mo file (optional, default to 78)
       
   134       - *autodetect_encoding*: boolean, if set to False the function will
       
   135         not try to detect the po file encoding (optional, default to True)
       
   136       - *encoding*: string, an encoding, only relevant if autodetect_encoding
       
   137         is set to False
       
   138       - *check_for_duplicates*: whether to check for duplicate entries when
       
   139         adding entries to the file, default: False (optional)
       
   140 
       
   141     **Example**:
       
   142 
       
   143     >>> import polib
       
   144     >>> mo = polib.mofile('tests/test_utf8.mo', check_for_duplicates=True)
       
   145     >>> mo #doctest: +ELLIPSIS
       
   146     <MOFile instance at ...>
       
   147     >>> import os, tempfile
       
   148     >>> for fname in ['test_iso-8859-15.mo', 'test_utf8.mo']:
       
   149     ...     orig_mo = polib.mofile('tests/'+fname)
       
   150     ...     tmpf = tempfile.NamedTemporaryFile().name
       
   151     ...     orig_mo.save(tmpf)
       
   152     ...     try:
       
   153     ...         new_mo = polib.mofile(tmpf)
       
   154     ...         for old, new in zip(orig_mo, new_mo):
       
   155     ...             if old.msgid != new.msgid:
       
   156     ...                 old.msgstr
       
   157     ...                 new.msgstr
       
   158     ...     finally:
       
   159     ...         os.unlink(tmpf)
       
   160     """
       
   161     if kwargs.get('autodetect_encoding', True) == True:
       
   162         enc = detect_encoding(fpath, True)
       
   163     else:
       
   164         enc = kwargs.get('encoding', default_encoding)
       
   165     parser = _MOFileParser(
       
   166         fpath,
       
   167         encoding=enc,
       
   168         check_for_duplicates=kwargs.get('check_for_duplicates', False)
       
   169     )
       
   170     instance = parser.parse()
       
   171     instance.wrapwidth = kwargs.get('wrapwidth', 78)
       
   172     return instance
       
   173 
       
   174 # }}}
       
   175 # function detect_encoding() {{{
       
   176 
       
   177 def detect_encoding(fpath, binary_mode=False):
       
   178     """
       
   179     Try to detect the encoding used by the file *fpath*. The function will
       
   180     return polib default *encoding* if it's unable to detect it.
       
   181 
       
   182     **Keyword argument**:
       
   183       - *fpath*: string, full or relative path to the mo file to parse.
       
   184 
       
   185     **Examples**:
       
   186 
       
   187     >>> print(detect_encoding('tests/test_noencoding.po'))
       
   188     utf-8
       
   189     >>> print(detect_encoding('tests/test_utf8.po'))
       
   190     UTF-8
       
   191     >>> print(detect_encoding('tests/test_utf8.mo', True))
       
   192     UTF-8
       
   193     >>> print(detect_encoding('tests/test_iso-8859-15.po'))
       
   194     ISO_8859-15
       
   195     >>> print(detect_encoding('tests/test_iso-8859-15.mo', True))
       
   196     ISO_8859-15
       
   197     """
       
   198     import re
       
   199     rx = re.compile(r'"?Content-Type:.+? charset=([\w_\-:\.]+)')
       
   200     if binary_mode:
       
   201         mode = 'rb'
       
   202     else:
       
   203         mode = 'r'
       
   204     f = open(fpath, mode)
       
   205     for l in f.readlines():
       
   206         match = rx.search(l)
       
   207         if match:
       
   208             f.close()
       
   209             return match.group(1).strip()
       
   210     f.close()
       
   211     return default_encoding
       
   212 
       
   213 # }}}
       
   214 # function escape() {{{
       
   215 
       
   216 def escape(st):
       
   217     """
       
   218     Escape special chars and return the given string *st*.
       
   219 
       
   220     **Examples**:
       
   221 
       
   222     >>> escape('\\t and \\n and \\r and " and \\\\')
       
   223     '\\\\t and \\\\n and \\\\r and \\\\" and \\\\\\\\'
       
   224     """
       
   225     return st.replace('\\', r'\\')\
       
   226              .replace('\t', r'\t')\
       
   227              .replace('\r', r'\r')\
       
   228              .replace('\n', r'\n')\
       
   229              .replace('\"', r'\"')
       
   230 
       
   231 # }}}
       
   232 # function unescape() {{{
       
   233 
       
   234 def unescape(st):
       
   235     """
       
   236     Unescape special chars and return the given string *st*.
       
   237 
       
   238     **Examples**:
       
   239 
       
   240     >>> unescape('\\\\t and \\\\n and \\\\r and \\\\" and \\\\\\\\')
       
   241     '\\t and \\n and \\r and " and \\\\'
       
   242     >>> unescape(r'\\n')
       
   243     '\\n'
       
   244     >>> unescape(r'\\\\n')
       
   245     '\\\\n'
       
   246     """
       
   247     raw_strings = [
       
   248         (r'\\n', r'\n', '\n'),
       
   249         (r'\\r', r'\r', '\r'),
       
   250         (r'\\t', r'\t', '\t'),
       
   251     ]
       
   252     for a, b, c in raw_strings:
       
   253         if a in st:
       
   254             st = st.replace(a, b)
       
   255         else:
       
   256             st = st.replace(b, c)
       
   257     return st.replace(r'\"', '"').replace(r'\\', '\\')
       
   258 
       
   259 # }}}
       
   260 # class _BaseFile {{{
       
   261 
       
   262 class _BaseFile(list):
       
   263     """
       
   264     Common parent class for POFile and MOFile classes.
       
   265     This class must **not** be instanciated directly.
       
   266     """
       
   267 
       
   268     def __init__(self, *args, **kwargs):
       
   269         """
       
   270         Constructor.
       
   271 
       
   272         **Keyword arguments**:
       
   273           - *fpath*: string, path to po or mo file
       
   274           - *wrapwidth*: integer, the wrap width, only useful when -w option
       
   275             was passed to xgettext to generate the po file that was used to
       
   276             format the mo file, default to 78 (optional),
       
   277           - *encoding*: string, the encoding to use, defaults to
       
   278             "default_encoding" global variable (optional),
       
   279           - *check_for_duplicates*: whether to check for duplicate entries
       
   280             when adding entries to the file, default: False (optional).
       
   281         """
       
   282         list.__init__(self)
       
   283         # the opened file handle
       
   284         self.fpath = kwargs.get('fpath')
       
   285         # the width at which lines should be wrapped
       
   286         self.wrapwidth = kwargs.get('wrapwidth', 78)
       
   287         # the file encoding
       
   288         self.encoding = kwargs.get('encoding', default_encoding)
       
   289         # whether to check for duplicate entries or not
       
   290         self.check_for_duplicates = kwargs.get('check_for_duplicates', False)
       
   291         # header
       
   292         self.header = ''
       
   293         # both po and mo files have metadata
       
   294         self.metadata = {}
       
   295         self.metadata_is_fuzzy = 0
       
   296 
       
   297     def __str__(self):
       
   298         """
       
   299         String representation of the file.
       
   300         """
       
   301         ret = []
       
   302         entries = [self.metadata_as_entry()] + \
       
   303                   [e for e in self if not e.obsolete]
       
   304         for entry in entries:
       
   305             ret.append(entry.__str__(self.wrapwidth))
       
   306         for entry in self.obsolete_entries():
       
   307             ret.append(entry.__str__(self.wrapwidth))
       
   308         return '\n'.join(ret)
       
   309 
       
   310     def __contains__(self, entry):
       
   311         """
       
   312         Overriden method to implement the membership test (in and not in).
       
   313         The method considers that an entry is in the file if it finds an 
       
   314         entry that has the same msgid (case sensitive).
       
   315 
       
   316         **Keyword argument**:
       
   317           - *entry*: an instance of polib._BaseEntry
       
   318 
       
   319         **Tests**:
       
   320         >>> po = POFile()
       
   321         >>> e1 = POEntry(msgid='foobar', msgstr='spam')
       
   322         >>> e2 = POEntry(msgid='barfoo', msgstr='spam')
       
   323         >>> e3 = POEntry(msgid='foobar', msgstr='eggs')
       
   324         >>> e4 = POEntry(msgid='spameggs', msgstr='eggs')
       
   325         >>> po.append(e1)
       
   326         >>> po.append(e2)
       
   327         >>> e1 in po
       
   328         True
       
   329         >>> e2 not in po
       
   330         False
       
   331         >>> e3 in po
       
   332         True
       
   333         >>> e4 in po
       
   334         False
       
   335         """
       
   336         return self.find(entry.msgid, by='msgid') is not None
       
   337 
       
   338     def append(self, entry):
       
   339         """
       
   340         Overriden method to check for duplicates entries, if a user tries to
       
   341         add an entry that already exists, the method will raise a ValueError
       
   342         exception.
       
   343 
       
   344         **Keyword argument**:
       
   345           - *entry*: an instance of polib._BaseEntry
       
   346 
       
   347         **Tests**:
       
   348         >>> e1 = POEntry(msgid='foobar', msgstr='spam')
       
   349         >>> e2 = POEntry(msgid='foobar', msgstr='eggs')
       
   350         >>> po = POFile(check_for_duplicates=True)
       
   351         >>> po.append(e1)
       
   352         >>> try:
       
   353         ...     po.append(e2)
       
   354         ... except ValueError, e:
       
   355         ...     unicode(e)
       
   356         u'Entry "foobar" already exists'
       
   357         """
       
   358         if self.check_for_duplicates and entry in self:
       
   359             raise ValueError('Entry "%s" already exists' % entry.msgid)
       
   360         super(_BaseFile, self).append(entry)
       
   361 
       
   362     def insert(self, index, entry):
       
   363         """
       
   364         Overriden method to check for duplicates entries, if a user tries to
       
   365         insert an entry that already exists, the method will raise a ValueError
       
   366         exception.
       
   367 
       
   368         **Keyword arguments**:
       
   369           - *index*: index at which the entry should be inserted
       
   370           - *entry*: an instance of polib._BaseEntry
       
   371 
       
   372         **Tests**:
       
   373         >>> import polib
       
   374         >>> polib.check_for_duplicates = True
       
   375         >>> e1 = POEntry(msgid='foobar', msgstr='spam')
       
   376         >>> e2 = POEntry(msgid='barfoo', msgstr='eggs')
       
   377         >>> e3 = POEntry(msgid='foobar', msgstr='eggs')
       
   378         >>> po = POFile(check_for_duplicates=True)
       
   379         >>> po.insert(0, e1)
       
   380         >>> po.insert(1, e2)
       
   381         >>> try:
       
   382         ...     po.insert(0, e3)
       
   383         ... except ValueError, e:
       
   384         ...     unicode(e)
       
   385         u'Entry "foobar" already exists'
       
   386         """
       
   387         if self.check_for_duplicates and entry in self:
       
   388             raise ValueError('Entry "%s" already exists' % entry.msgid)
       
   389         super(_BaseFile, self).insert(index, entry)
       
   390 
       
   391     def __repr__(self):
       
   392         """Return the official string representation of the object."""
       
   393         return '<%s instance at %x>' % (self.__class__.__name__, id(self))
       
   394 
       
   395     def metadata_as_entry(self):
       
   396         """
       
   397         Return the metadata as an entry:
       
   398 
       
   399         >>> import polib
       
   400         >>> po = polib.pofile('tests/test_fuzzy_header.po')
       
   401         >>> unicode(po) == unicode(open('tests/test_fuzzy_header.po').read())
       
   402         True
       
   403         """
       
   404         e = POEntry(msgid='')
       
   405         mdata = self.ordered_metadata()
       
   406         if mdata:
       
   407             strs = []
       
   408             e._multiline_str['msgstr'] = ''
       
   409             for name, value in mdata:
       
   410                 # Strip whitespace off each line in a multi-line entry
       
   411                 strs.append('%s: %s' % (name, value))
       
   412             e.msgstr = '\n'.join(strs) + '\n'
       
   413             e._multiline_str['msgstr'] = '__POLIB__NL__'.join(
       
   414                     [s + '\n' for s in strs])
       
   415         if self.metadata_is_fuzzy:
       
   416             e.flags.append('fuzzy')
       
   417         return e
       
   418 
       
   419     def save(self, fpath=None, repr_method='__str__'):
       
   420         """
       
   421         Save the po file to file *fpath* if no file handle exists for
       
   422         the object. If there's already an open file and no fpath is
       
   423         provided, then the existing file is rewritten with the modified
       
   424         data.
       
   425 
       
   426         **Keyword arguments**:
       
   427           - *fpath*: string, full or relative path to the file.
       
   428           - *repr_method*: string, the method to use for output.
       
   429         """
       
   430         if self.fpath is None and fpath is None:
       
   431             raise IOError('You must provide a file path to save() method')
       
   432         contents = getattr(self, repr_method)()
       
   433         if fpath is None:
       
   434             fpath = self.fpath
       
   435         if repr_method == 'to_binary':
       
   436             fhandle = open(fpath, 'wb')
       
   437         else:
       
   438             fhandle = codecs.open(fpath, 'w', self.encoding)
       
   439             if type(contents) != types.UnicodeType:
       
   440                 contents = contents.decode(self.encoding)
       
   441         fhandle.write(contents)
       
   442         fhandle.close()
       
   443 
       
   444     def find(self, st, by='msgid'):
       
   445         """
       
   446         Find entry which msgid (or property identified by the *by*
       
   447         attribute) matches the string *st*.
       
   448 
       
   449         **Keyword arguments**:
       
   450           - *st*: string, the string to search for
       
   451           - *by*: string, the comparison attribute
       
   452 
       
   453         **Examples**:
       
   454 
       
   455         >>> po = pofile('tests/test_utf8.po')
       
   456         >>> entry = po.find('Thursday')
       
   457         >>> entry.msgstr
       
   458         u'Jueves'
       
   459         >>> entry = po.find('Some unexistant msgid')
       
   460         >>> entry is None
       
   461         True
       
   462         >>> entry = po.find('Jueves', 'msgstr')
       
   463         >>> entry.msgid
       
   464         u'Thursday'
       
   465         """
       
   466         for e in self:
       
   467             if getattr(e, by) == st:
       
   468                 return e
       
   469         return None
       
   470 
       
   471     def ordered_metadata(self):
       
   472         """
       
   473         Convenience method that return the metadata ordered. The return
       
   474         value is list of tuples (metadata name, metadata_value).
       
   475         """
       
   476         # copy the dict first
       
   477         metadata = self.metadata.copy()
       
   478         data_order = [
       
   479             'Project-Id-Version',
       
   480             'Report-Msgid-Bugs-To',
       
   481             'POT-Creation-Date',
       
   482             'PO-Revision-Date',
       
   483             'Last-Translator',
       
   484             'Language-Team',
       
   485             'MIME-Version',
       
   486             'Content-Type',
       
   487             'Content-Transfer-Encoding'
       
   488         ]
       
   489         ordered_data = []
       
   490         for data in data_order:
       
   491             try:
       
   492                 value = metadata.pop(data)
       
   493                 ordered_data.append((data, value))
       
   494             except KeyError:
       
   495                 pass
       
   496         # the rest of the metadata won't be ordered there are no specs for this
       
   497         keys = metadata.keys()
       
   498         list(keys).sort()
       
   499         for data in keys:
       
   500             value = metadata[data]
       
   501             ordered_data.append((data, value))
       
   502         return ordered_data
       
   503 
       
   504     def to_binary(self):
       
   505         """
       
   506         Return the mofile binary representation.
       
   507         """
       
   508         import array
       
   509         import struct
       
   510         import types
       
   511         offsets = []
       
   512         entries = self.translated_entries()
       
   513         # the keys are sorted in the .mo file
       
   514         def cmp(_self, other):
       
   515             if _self.msgid > other.msgid:
       
   516                 return 1
       
   517             elif _self.msgid < other.msgid:
       
   518                 return -1
       
   519             else:
       
   520                 return 0
       
   521         # add metadata entry
       
   522         entries.sort(cmp)
       
   523         mentry = self.metadata_as_entry()
       
   524         mentry.msgstr = mentry.msgstr.replace('\\n', '').lstrip()
       
   525         entries = [mentry] + entries
       
   526         entries_len = len(entries)
       
   527         ids, strs = '', ''
       
   528         for e in entries:
       
   529             # For each string, we need size and file offset.  Each string is
       
   530             # NUL terminated; the NUL does not count into the size.
       
   531             if e.msgid_plural:
       
   532                 indexes = e.msgstr_plural.keys()
       
   533                 indexes.sort()
       
   534                 msgstr = []
       
   535                 for index in indexes:
       
   536                     msgstr.append(e.msgstr_plural[index])
       
   537                 msgid = self._encode(e.msgid + '\0' + e.msgid_plural)
       
   538                 msgstr = self._encode('\0'.join(msgstr))
       
   539             else:
       
   540                 msgid = self._encode(e.msgid)
       
   541                 msgstr = self._encode(e.msgstr)
       
   542             offsets.append((len(ids), len(msgid), len(strs), len(msgstr)))
       
   543             ids  += msgid  + '\0'
       
   544             strs += msgstr + '\0'
       
   545         # The header is 7 32-bit unsigned integers.
       
   546         keystart = 7*4+16*entries_len
       
   547         # and the values start after the keys
       
   548         valuestart = keystart + len(ids)
       
   549         koffsets = []
       
   550         voffsets = []
       
   551         # The string table first has the list of keys, then the list of values.
       
   552         # Each entry has first the size of the string, then the file offset.
       
   553         for o1, l1, o2, l2 in offsets:
       
   554             koffsets += [l1, o1+keystart]
       
   555             voffsets += [l2, o2+valuestart]
       
   556         offsets = koffsets + voffsets
       
   557         output  = struct.pack("IIIIIII",
       
   558                              0x950412de,        # Magic number
       
   559                              0,                 # Version
       
   560                              entries_len,       # # of entries
       
   561                              7*4,               # start of key index
       
   562                              7*4+entries_len*8, # start of value index
       
   563                              0, 0)              # size and offset of hash table
       
   564         output += array.array("I", offsets).tostring()
       
   565         output += ids
       
   566         output += strs
       
   567         return output
       
   568 
       
   569     def _encode(self, mixed):
       
   570         """
       
   571         Encode the given argument with the file encoding if the type is unicode
       
   572         and return the encoded string.
       
   573         """
       
   574         if type(mixed) == types.UnicodeType:
       
   575             return mixed.encode(self.encoding)
       
   576         return mixed
       
   577 
       
   578 # }}}
       
   579 # class POFile {{{
       
   580 
       
   581 class POFile(_BaseFile):
       
   582     '''
       
   583     Po (or Pot) file reader/writer.
       
   584     POFile objects inherit the list objects methods.
       
   585 
       
   586     **Example**:
       
   587 
       
   588     >>> po = POFile()
       
   589     >>> entry1 = POEntry(
       
   590     ...     msgid="Some english text",
       
   591     ...     msgstr="Un texte en anglais"
       
   592     ... )
       
   593     >>> entry1.occurrences = [('testfile', 12),('another_file', 1)]
       
   594     >>> entry1.comment = "Some useful comment"
       
   595     >>> entry2 = POEntry(
       
   596     ...     msgid="Peace in some languages",
       
   597     ...     msgstr="Pace سلام שלום Hasîtî 和平"
       
   598     ... )
       
   599     >>> entry2.occurrences = [('testfile', 15),('another_file', 5)]
       
   600     >>> entry2.comment = "Another useful comment"
       
   601     >>> entry3 = POEntry(
       
   602     ...     msgid='Some entry with quotes " \\"',
       
   603     ...     msgstr='Un message unicode avec des quotes " \\"'
       
   604     ... )
       
   605     >>> entry3.comment = "Test string quoting"
       
   606     >>> po.append(entry1)
       
   607     >>> po.append(entry2)
       
   608     >>> po.append(entry3)
       
   609     >>> po.header = "Some Header"
       
   610     >>> print(po)
       
   611     # Some Header
       
   612     msgid ""
       
   613     msgstr ""
       
   614     <BLANKLINE>
       
   615     #. Some useful comment
       
   616     #: testfile:12 another_file:1
       
   617     msgid "Some english text"
       
   618     msgstr "Un texte en anglais"
       
   619     <BLANKLINE>
       
   620     #. Another useful comment
       
   621     #: testfile:15 another_file:5
       
   622     msgid "Peace in some languages"
       
   623     msgstr "Pace سلام שלום Hasîtî 和平"
       
   624     <BLANKLINE>
       
   625     #. Test string quoting
       
   626     msgid "Some entry with quotes \\" \\""
       
   627     msgstr "Un message unicode avec des quotes \\" \\""
       
   628     <BLANKLINE>
       
   629     '''
       
   630 
       
   631     def __str__(self):
       
   632         """Return the string representation of the po file"""
       
   633         ret, headers = '', self.header.split('\n')
       
   634         for header in headers:
       
   635             if header[:1] in [',', ':']:
       
   636                 ret += '#%s\n' % header
       
   637             else:
       
   638                 ret += '# %s\n' % header
       
   639         return ret + _BaseFile.__str__(self)
       
   640 
       
   641     def save_as_mofile(self, fpath):
       
   642         """
       
   643         Save the binary representation of the file to *fpath*.
       
   644 
       
   645         **Keyword arguments**:
       
   646           - *fpath*: string, full or relative path to the file.
       
   647         """
       
   648         _BaseFile.save(self, fpath, 'to_binary')
       
   649 
       
   650     def percent_translated(self):
       
   651         """
       
   652         Convenience method that return the percentage of translated
       
   653         messages.
       
   654 
       
   655         **Example**:
       
   656 
       
   657         >>> import polib
       
   658         >>> po = polib.pofile('tests/test_pofile_helpers.po')
       
   659         >>> po.percent_translated()
       
   660         50
       
   661         >>> po = POFile()
       
   662         >>> po.percent_translated()
       
   663         100
       
   664         """
       
   665         total = len([e for e in self if not e.obsolete])
       
   666         if total == 0:
       
   667             return 100
       
   668         translated = len(self.translated_entries())
       
   669         return int((100.00 / float(total)) * translated)
       
   670 
       
   671     def translated_entries(self):
       
   672         """
       
   673         Convenience method that return a list of translated entries.
       
   674 
       
   675         **Example**:
       
   676 
       
   677         >>> import polib
       
   678         >>> po = polib.pofile('tests/test_pofile_helpers.po')
       
   679         >>> len(po.translated_entries())
       
   680         6
       
   681         """
       
   682         return [e for e in self if e.translated()]
       
   683 
       
   684     def untranslated_entries(self):
       
   685         """
       
   686         Convenience method that return a list of untranslated entries.
       
   687 
       
   688         **Example**:
       
   689 
       
   690         >>> import polib
       
   691         >>> po = polib.pofile('tests/test_pofile_helpers.po')
       
   692         >>> len(po.untranslated_entries())
       
   693         4
       
   694         """
       
   695         return [e for e in self if not e.translated() and not e.obsolete \
       
   696                 and not 'fuzzy' in e.flags]
       
   697 
       
   698     def fuzzy_entries(self):
       
   699         """
       
   700         Convenience method that return the list of 'fuzzy' entries.
       
   701 
       
   702         **Example**:
       
   703 
       
   704         >>> import polib
       
   705         >>> po = polib.pofile('tests/test_pofile_helpers.po')
       
   706         >>> len(po.fuzzy_entries())
       
   707         2
       
   708         """
       
   709         return [e for e in self if 'fuzzy' in e.flags]
       
   710 
       
   711     def obsolete_entries(self):
       
   712         """
       
   713         Convenience method that return the list of obsolete entries.
       
   714 
       
   715         **Example**:
       
   716 
       
   717         >>> import polib
       
   718         >>> po = polib.pofile('tests/test_pofile_helpers.po')
       
   719         >>> len(po.obsolete_entries())
       
   720         4
       
   721         """
       
   722         return [e for e in self if e.obsolete]
       
   723 
       
   724     def merge(self, refpot):
       
   725         """
       
   726         XXX this could not work if encodings are different, needs thinking
       
   727         and general refactoring of how polib handles encoding...
       
   728 
       
   729         Convenience method that merge the current pofile with the pot file
       
   730         provided. It behaves exactly as the gettext msgmerge utility:
       
   731 
       
   732           - comments of this file will be preserved, but extracted comments
       
   733             and occurrences will be discarded
       
   734           - any translations or comments in the file will be discarded,
       
   735             however dot comments and file positions will be preserved
       
   736 
       
   737         **Keyword argument**:
       
   738           - *refpot*: object POFile, the reference catalog.
       
   739 
       
   740         **Example**:
       
   741 
       
   742         >>> import polib
       
   743         >>> refpot = polib.pofile('tests/test_merge.pot')
       
   744         >>> po = polib.pofile('tests/test_merge_before.po')
       
   745         >>> po.merge(refpot)
       
   746         >>> expected_po = polib.pofile('tests/test_merge_after.po')
       
   747         >>> unicode(po) == unicode(expected_po)
       
   748         True
       
   749         """
       
   750         for entry in refpot:
       
   751             e = self.find(entry.msgid)
       
   752             if e is None:
       
   753                 e = POEntry()
       
   754                 self.append(e)
       
   755             e.merge(entry)
       
   756         # ok, now we must "obsolete" entries that are not in the refpot
       
   757         # anymore
       
   758         for entry in self:
       
   759             if refpot.find(entry.msgid) is None:
       
   760                 entry.obsolete = True
       
   761 
       
   762 # }}}
       
   763 # class MOFile {{{
       
   764 
       
   765 class MOFile(_BaseFile):
       
   766     '''
       
   767     Mo file reader/writer.
       
   768     MOFile objects inherit the list objects methods.
       
   769 
       
   770     **Example**:
       
   771 
       
   772     >>> mo = MOFile()
       
   773     >>> entry1 = POEntry(
       
   774     ...     msgid="Some english text",
       
   775     ...     msgstr="Un texte en anglais"
       
   776     ... )
       
   777     >>> entry2 = POEntry(
       
   778     ...     msgid="I need my dirty cheese",
       
   779     ...     msgstr="Je veux mon sale fromage"
       
   780     ... )
       
   781     >>> entry3 = MOEntry(
       
   782     ...     msgid='Some entry with quotes " \\"',
       
   783     ...     msgstr='Un message unicode avec des quotes " \\"'
       
   784     ... )
       
   785     >>> mo.append(entry1)
       
   786     >>> mo.append(entry2)
       
   787     >>> mo.append(entry3)
       
   788     >>> print(mo)
       
   789     msgid ""
       
   790     msgstr ""
       
   791     <BLANKLINE>
       
   792     msgid "Some english text"
       
   793     msgstr "Un texte en anglais"
       
   794     <BLANKLINE>
       
   795     msgid "I need my dirty cheese"
       
   796     msgstr "Je veux mon sale fromage"
       
   797     <BLANKLINE>
       
   798     msgid "Some entry with quotes \\" \\""
       
   799     msgstr "Un message unicode avec des quotes \\" \\""
       
   800     <BLANKLINE>
       
   801     '''
       
   802 
       
   803     def __init__(self, *args, **kwargs):
       
   804         """
       
   805         MOFile constructor. Mo files have two other properties:
       
   806             - magic_number: the magic_number of the binary file,
       
   807             - version: the version of the mo spec.
       
   808         """
       
   809         _BaseFile.__init__(self, *args, **kwargs)
       
   810         self.magic_number = None
       
   811         self.version = 0
       
   812 
       
   813     def save_as_pofile(self, fpath):
       
   814         """
       
   815         Save the string representation of the file to *fpath*.
       
   816 
       
   817         **Keyword argument**:
       
   818           - *fpath*: string, full or relative path to the file.
       
   819         """
       
   820         _BaseFile.save(self, fpath)
       
   821 
       
   822     def save(self, fpath):
       
   823         """
       
   824         Save the binary representation of the file to *fpath*.
       
   825 
       
   826         **Keyword argument**:
       
   827           - *fpath*: string, full or relative path to the file.
       
   828         """
       
   829         _BaseFile.save(self, fpath, 'to_binary')
       
   830 
       
   831     def percent_translated(self):
       
   832         """
       
   833         Convenience method to keep the same interface with POFile instances.
       
   834         """
       
   835         return 100
       
   836 
       
   837     def translated_entries(self):
       
   838         """
       
   839         Convenience method to keep the same interface with POFile instances.
       
   840         """
       
   841         return self
       
   842 
       
   843     def untranslated_entries(self):
       
   844         """
       
   845         Convenience method to keep the same interface with POFile instances.
       
   846         """
       
   847         return []
       
   848 
       
   849     def fuzzy_entries(self):
       
   850         """
       
   851         Convenience method to keep the same interface with POFile instances.
       
   852         """
       
   853         return []
       
   854 
       
   855     def obsolete_entries(self):
       
   856         """
       
   857         Convenience method to keep the same interface with POFile instances.
       
   858         """
       
   859         return []
       
   860 
       
   861 # }}}
       
   862 # class _BaseEntry {{{
       
   863 
       
   864 class _BaseEntry(object):
       
   865     """
       
   866     Base class for POEntry or MOEntry objects.
       
   867     This class must *not* be instanciated directly.
       
   868     """
       
   869 
       
   870     def __init__(self, *args, **kwargs):
       
   871         """Base Entry constructor."""
       
   872         self.msgid = kwargs.get('msgid', '')
       
   873         self.msgstr = kwargs.get('msgstr', '')
       
   874         self.msgid_plural = kwargs.get('msgid_plural', '')
       
   875         self.msgstr_plural = kwargs.get('msgstr_plural', {})
       
   876         self.obsolete = kwargs.get('obsolete', False)
       
   877         self.encoding = kwargs.get('encoding', default_encoding)
       
   878         self.msgctxt = kwargs.get('msgctxt', None)
       
   879         self._multiline_str = {}
       
   880 
       
   881     def __repr__(self):
       
   882         """Return the official string representation of the object."""
       
   883         return '<%s instance at %x>' % (self.__class__.__name__, id(self))
       
   884 
       
   885     def __str__(self, wrapwidth=78):
       
   886         """
       
   887         Common string representation of the POEntry and MOEntry
       
   888         objects.
       
   889         """
       
   890         if self.obsolete:
       
   891             delflag = '#~ '
       
   892         else:
       
   893             delflag = ''
       
   894         ret = []
       
   895         # write the msgctxt if any
       
   896         if self.msgctxt is not None:
       
   897             ret += self._str_field("msgctxt", delflag, "", self.msgctxt)
       
   898         # write the msgid
       
   899         ret += self._str_field("msgid", delflag, "", self.msgid)
       
   900         # write the msgid_plural if any
       
   901         if self.msgid_plural:
       
   902             ret += self._str_field("msgid_plural", delflag, "", self.msgid_plural)
       
   903         if self.msgstr_plural:
       
   904             # write the msgstr_plural if any
       
   905             msgstrs = self.msgstr_plural
       
   906             keys = list(msgstrs)
       
   907             keys.sort()
       
   908             for index in keys:
       
   909                 msgstr = msgstrs[index]
       
   910                 plural_index = '[%s]' % index
       
   911                 ret += self._str_field("msgstr", delflag, plural_index, msgstr)
       
   912         else:
       
   913             # otherwise write the msgstr
       
   914             ret += self._str_field("msgstr", delflag, "", self.msgstr)
       
   915         ret.append('')
       
   916         return '\n'.join(ret)
       
   917 
       
   918     def _str_field(self, fieldname, delflag, plural_index, field):
       
   919         if (fieldname + plural_index) in self._multiline_str:
       
   920             field = self._multiline_str[fieldname + plural_index]
       
   921             lines = [''] + field.split('__POLIB__NL__')
       
   922         else:
       
   923             lines = field.splitlines(True)
       
   924             if len(lines) > 1:
       
   925                 lines = ['']+lines # start with initial empty line
       
   926             else:
       
   927                 lines = [field] # needed for the empty string case
       
   928         if fieldname.startswith('previous_'):
       
   929             # quick and dirty trick to get the real field name
       
   930             fieldname = fieldname[9:]
       
   931 
       
   932         ret = ['%s%s%s "%s"' % (delflag, fieldname, plural_index,
       
   933                                 escape(lines.pop(0)))]
       
   934         for mstr in lines:
       
   935             ret.append('%s"%s"' % (delflag, escape(mstr)))
       
   936         return ret
       
   937 
       
   938 # }}}
       
   939 # class POEntry {{{
       
   940 
       
   941 class POEntry(_BaseEntry):
       
   942     """
       
   943     Represents a po file entry.
       
   944 
       
   945     **Examples**:
       
   946 
       
   947     >>> entry = POEntry(msgid='Welcome', msgstr='Bienvenue')
       
   948     >>> entry.occurrences = [('welcome.py', 12), ('anotherfile.py', 34)]
       
   949     >>> print(entry)
       
   950     #: welcome.py:12 anotherfile.py:34
       
   951     msgid "Welcome"
       
   952     msgstr "Bienvenue"
       
   953     <BLANKLINE>
       
   954     >>> entry = POEntry()
       
   955     >>> entry.occurrences = [('src/some-very-long-filename-that-should-not-be-wrapped-even-if-it-is-larger-than-the-wrap-limit.c', 32), ('src/eggs.c', 45)]
       
   956     >>> entry.comment = 'A plural translation. This is a very very very long line please do not wrap, this is just for testing comment wrapping...'
       
   957     >>> entry.tcomment = 'A plural translation. This is a very very very long line please do not wrap, this is just for testing comment wrapping...'
       
   958     >>> entry.flags.append('c-format')
       
   959     >>> entry.previous_msgctxt = '@somecontext'
       
   960     >>> entry.previous_msgid = 'I had eggs but no spam !'
       
   961     >>> entry.previous_msgid_plural = 'I had eggs and %d spam !'
       
   962     >>> entry.msgctxt = '@somenewcontext'
       
   963     >>> entry.msgid = 'I have spam but no egg !'
       
   964     >>> entry.msgid_plural = 'I have spam and %d eggs !'
       
   965     >>> entry.msgstr_plural[0] = "J'ai du jambon mais aucun oeuf !"
       
   966     >>> entry.msgstr_plural[1] = "J'ai du jambon et %d oeufs !"
       
   967     >>> print(entry)
       
   968     #. A plural translation. This is a very very very long line please do not
       
   969     #. wrap, this is just for testing comment wrapping...
       
   970     # A plural translation. This is a very very very long line please do not wrap,
       
   971     # this is just for testing comment wrapping...
       
   972     #: src/some-very-long-filename-that-should-not-be-wrapped-even-if-it-is-larger-than-the-wrap-limit.c:32
       
   973     #: src/eggs.c:45
       
   974     #, c-format
       
   975     #| msgctxt "@somecontext"
       
   976     #| msgid "I had eggs but no spam !"
       
   977     #| msgid_plural "I had eggs and %d spam !"
       
   978     msgctxt "@somenewcontext"
       
   979     msgid "I have spam but no egg !"
       
   980     msgid_plural "I have spam and %d eggs !"
       
   981     msgstr[0] "J'ai du jambon mais aucun oeuf !"
       
   982     msgstr[1] "J'ai du jambon et %d oeufs !"
       
   983     <BLANKLINE>
       
   984     """
       
   985 
       
   986     def __init__(self, *args, **kwargs):
       
   987         """POEntry constructor."""
       
   988         _BaseEntry.__init__(self, *args, **kwargs)
       
   989         self.comment = kwargs.get('comment', '')
       
   990         self.tcomment = kwargs.get('tcomment', '')
       
   991         self.occurrences = kwargs.get('occurrences', [])
       
   992         self.flags = kwargs.get('flags', [])
       
   993         self.previous_msgctxt = kwargs.get('previous_msgctxt', None)
       
   994         self.previous_msgid = kwargs.get('previous_msgid', None)
       
   995         self.previous_msgid_plural = kwargs.get('previous_msgid_plural', None)
       
   996 
       
   997     def __str__(self, wrapwidth=78):
       
   998         """
       
   999         Return the string representation of the entry.
       
  1000         """
       
  1001         if self.obsolete:
       
  1002             return _BaseEntry.__str__(self)
       
  1003         ret = []
       
  1004         # comment first, if any (with text wrapping as xgettext does)
       
  1005         if self.comment != '':
       
  1006             for comment in self.comment.split('\n'):
       
  1007                 if wrapwidth > 0 and len(comment) > wrapwidth-3:
       
  1008                     ret += textwrap.wrap(comment, wrapwidth,
       
  1009                                          initial_indent='#. ',
       
  1010                                          subsequent_indent='#. ',
       
  1011                                          break_long_words=False)
       
  1012                 else:
       
  1013                     ret.append('#. %s' % comment)
       
  1014         # translator comment, if any (with text wrapping as xgettext does)
       
  1015         if self.tcomment != '':
       
  1016             for tcomment in self.tcomment.split('\n'):
       
  1017                 if wrapwidth > 0 and len(tcomment) > wrapwidth-2:
       
  1018                     ret += textwrap.wrap(tcomment, wrapwidth,
       
  1019                                          initial_indent='# ',
       
  1020                                          subsequent_indent='# ',
       
  1021                                          break_long_words=False)
       
  1022                 else:
       
  1023                     ret.append('# %s' % tcomment)
       
  1024         # occurrences (with text wrapping as xgettext does)
       
  1025         if self.occurrences:
       
  1026             filelist = []
       
  1027             for fpath, lineno in self.occurrences:
       
  1028                 if lineno:
       
  1029                     filelist.append('%s:%s' % (fpath, lineno))
       
  1030                 else:
       
  1031                     filelist.append(fpath)
       
  1032             filestr = ' '.join(filelist)
       
  1033             if wrapwidth > 0 and len(filestr)+3 > wrapwidth:
       
  1034                 # XXX textwrap split words that contain hyphen, this is not 
       
  1035                 # what we want for filenames, so the dirty hack is to 
       
  1036                 # temporally replace hyphens with a char that a file cannot 
       
  1037                 # contain, like "*"
       
  1038                 lines = textwrap.wrap(filestr.replace('-', '*'),
       
  1039                                       wrapwidth,
       
  1040                                       initial_indent='#: ',
       
  1041                                       subsequent_indent='#: ',
       
  1042                                       break_long_words=False)
       
  1043                 # end of the replace hack
       
  1044                 for line in lines:
       
  1045                     ret.append(line.replace('*', '-'))
       
  1046             else:
       
  1047                 ret.append('#: '+filestr)
       
  1048         # flags
       
  1049         if self.flags:
       
  1050             flags = []
       
  1051             for flag in self.flags:
       
  1052                 flags.append(flag)
       
  1053             ret.append('#, %s' % ', '.join(flags))
       
  1054 
       
  1055         # previous context and previous msgid/msgid_plural
       
  1056         if self.previous_msgctxt:
       
  1057             ret += self._str_field("previous_msgctxt", "#| ", "",
       
  1058                                    self.previous_msgctxt)
       
  1059         if self.previous_msgid:
       
  1060             ret += self._str_field("previous_msgid", "#| ", "", 
       
  1061                                    self.previous_msgid)
       
  1062         if self.previous_msgid_plural:
       
  1063             ret += self._str_field("previous_msgid_plural", "#| ", "", 
       
  1064                                    self.previous_msgid_plural)
       
  1065 
       
  1066         ret.append(_BaseEntry.__str__(self))
       
  1067         return '\n'.join(ret)
       
  1068 
       
  1069     def __cmp__(self, other):
       
  1070         '''
       
  1071         Called by comparison operations if rich comparison is not defined.
       
  1072 
       
  1073         **Tests**:
       
  1074         >>> a  = POEntry(msgid='a', occurrences=[('b.py', 1), ('b.py', 3)])
       
  1075         >>> b  = POEntry(msgid='b', occurrences=[('b.py', 1), ('b.py', 3)])
       
  1076         >>> c1 = POEntry(msgid='c1', occurrences=[('a.py', 1), ('b.py', 1)])
       
  1077         >>> c2 = POEntry(msgid='c2', occurrences=[('a.py', 1), ('a.py', 3)])
       
  1078         >>> po = POFile()
       
  1079         >>> po.append(a)
       
  1080         >>> po.append(b)
       
  1081         >>> po.append(c1)
       
  1082         >>> po.append(c2)
       
  1083         >>> po.sort()
       
  1084         >>> print(po)
       
  1085         # 
       
  1086         msgid ""
       
  1087         msgstr ""
       
  1088         <BLANKLINE>
       
  1089         #: a.py:1 a.py:3
       
  1090         msgid "c2"
       
  1091         msgstr ""
       
  1092         <BLANKLINE>
       
  1093         #: a.py:1 b.py:1
       
  1094         msgid "c1"
       
  1095         msgstr ""
       
  1096         <BLANKLINE>
       
  1097         #: b.py:1 b.py:3
       
  1098         msgid "a"
       
  1099         msgstr ""
       
  1100         <BLANKLINE>
       
  1101         #: b.py:1 b.py:3
       
  1102         msgid "b"
       
  1103         msgstr ""
       
  1104         <BLANKLINE>
       
  1105         '''
       
  1106         def compare_occurrences(a, b):
       
  1107             """
       
  1108             Compare an entry occurrence with another one.
       
  1109             """
       
  1110             if a[0] != b[0]:
       
  1111                 return a[0] < b[0]
       
  1112             if a[1] != b[1]:
       
  1113                 return a[1] < b[1]
       
  1114             return 0
       
  1115 
       
  1116         # First: Obsolete test
       
  1117         if self.obsolete != other.obsolete:
       
  1118             if self.obsolete:
       
  1119                 return -1
       
  1120             else:
       
  1121                 return 1
       
  1122         # Work on a copy to protect original
       
  1123         occ1 = self.occurrences[:]
       
  1124         occ2 = other.occurrences[:]
       
  1125         # Sorting using compare method
       
  1126         occ1.sort(compare_occurrences)
       
  1127         occ2.sort(compare_occurrences)
       
  1128         # Comparing sorted occurrences
       
  1129         pos = 0
       
  1130         for entry1 in occ1:
       
  1131             try:
       
  1132                 entry2 = occ2[pos]
       
  1133             except IndexError:
       
  1134                 return 1
       
  1135             pos = pos + 1
       
  1136             if entry1[0] != entry2[0]:
       
  1137                 if entry1[0] > entry2[0]:
       
  1138                     return 1
       
  1139                 else:
       
  1140                     return -1
       
  1141             if entry1[1] != entry2[1]:
       
  1142                 if entry1[1] > entry2[1]:
       
  1143                     return 1
       
  1144                 else:
       
  1145                     return -1
       
  1146         # Finally: Compare message ID
       
  1147         if self.msgid > other.msgid: return 1
       
  1148         else: return -1
       
  1149 
       
  1150     def translated(self):
       
  1151         """
       
  1152         Return True if the entry has been translated or False.
       
  1153         """
       
  1154         if self.obsolete or 'fuzzy' in self.flags:
       
  1155             return False
       
  1156         if self.msgstr != '':
       
  1157             return True
       
  1158         if self.msgstr_plural:
       
  1159             for pos in self.msgstr_plural:
       
  1160                 if self.msgstr_plural[pos] == '':
       
  1161                     return False
       
  1162             return True
       
  1163         return False
       
  1164 
       
  1165     def merge(self, other):
       
  1166         """
       
  1167         Merge the current entry with the given pot entry.
       
  1168         """
       
  1169         self.msgid        = other.msgid
       
  1170         self.occurrences  = other.occurrences
       
  1171         self.comment      = other.comment
       
  1172         self.flags        = other.flags
       
  1173         self.msgid_plural = other.msgid_plural
       
  1174         if other.msgstr_plural:
       
  1175             for pos in other.msgstr_plural:
       
  1176                 try:
       
  1177                     # keep existing translation at pos if any
       
  1178                     self.msgstr_plural[pos]
       
  1179                 except KeyError:
       
  1180                     self.msgstr_plural[pos] = ''
       
  1181 
       
  1182 # }}}
       
  1183 # class MOEntry {{{
       
  1184 
       
  1185 class MOEntry(_BaseEntry):
       
  1186     """
       
  1187     Represents a mo file entry.
       
  1188 
       
  1189     **Examples**:
       
  1190 
       
  1191     >>> entry = MOEntry()
       
  1192     >>> entry.msgid  = 'translate me !'
       
  1193     >>> entry.msgstr = 'traduisez moi !'
       
  1194     >>> print(entry)
       
  1195     msgid "translate me !"
       
  1196     msgstr "traduisez moi !"
       
  1197     <BLANKLINE>
       
  1198     """
       
  1199 
       
  1200     def __str__(self, wrapwidth=78):
       
  1201         """
       
  1202         Return the string representation of the entry.
       
  1203         """
       
  1204         return _BaseEntry.__str__(self, wrapwidth)
       
  1205 
       
  1206 # }}}
       
  1207 # class _POFileParser {{{
       
  1208 
       
  1209 class _POFileParser(object):
       
  1210     """
       
  1211     A finite state machine to parse efficiently and correctly po
       
  1212     file format.
       
  1213     """
       
  1214 
       
  1215     def __init__(self, fpath, *args, **kwargs):
       
  1216         """
       
  1217         Constructor.
       
  1218 
       
  1219         **Arguments**:
       
  1220           - *fpath*: string, path to the po file
       
  1221           - *encoding*: string, the encoding to use, defaults to
       
  1222             "default_encoding" global variable (optional),
       
  1223           - *check_for_duplicates*: whether to check for duplicate entries
       
  1224             when adding entries to the file, default: False (optional).
       
  1225         """
       
  1226         enc = kwargs.get('encoding', default_encoding)
       
  1227         check_dup = kwargs.get('check_for_duplicates', False)
       
  1228         try:
       
  1229             self.fhandle = codecs.open(fpath, 'rU', enc)
       
  1230         except LookupError:
       
  1231             enc = default_encoding
       
  1232             self.fhandle = codecs.open(fpath, 'rU', enc)
       
  1233         self.instance = POFile(
       
  1234             fpath=fpath,
       
  1235             encoding=enc,
       
  1236             check_for_duplicates=check_dup
       
  1237         )
       
  1238         self.transitions = {}
       
  1239         self.current_entry = POEntry()
       
  1240         self.current_state = 'ST'
       
  1241         self.current_token = None
       
  1242         # two memo flags used in handlers
       
  1243         self.msgstr_index = 0
       
  1244         self.entry_obsolete = 0
       
  1245         # Configure the state machine, by adding transitions.
       
  1246         # Signification of symbols:
       
  1247         #     * ST: Beginning of the file (start)
       
  1248         #     * HE: Header
       
  1249         #     * TC: a translation comment
       
  1250         #     * GC: a generated comment
       
  1251         #     * OC: a file/line occurence
       
  1252         #     * FL: a flags line
       
  1253         #     * CT: a message context
       
  1254         #     * PC: a previous msgctxt
       
  1255         #     * PM: a previous msgid
       
  1256         #     * PP: a previous msgid_plural
       
  1257         #     * MI: a msgid
       
  1258         #     * MP: a msgid plural
       
  1259         #     * MS: a msgstr
       
  1260         #     * MX: a msgstr plural
       
  1261         #     * MC: a msgid or msgstr continuation line
       
  1262         all = ['ST', 'HE', 'GC', 'OC', 'FL', 'CT', 'PC', 'PM', 'PP', 'TC',
       
  1263                'MS', 'MP', 'MX', 'MI']
       
  1264 
       
  1265         self.add('TC', ['ST', 'HE'],                                     'HE')
       
  1266         self.add('TC', ['GC', 'OC', 'FL', 'TC', 'PC', 'PM', 'PP', 'MS',
       
  1267                         'MP', 'MX', 'MI'],                               'TC')
       
  1268         self.add('GC', all,                                              'GC')
       
  1269         self.add('OC', all,                                              'OC')
       
  1270         self.add('FL', all,                                              'FL')
       
  1271         self.add('PC', all,                                              'PC')
       
  1272         self.add('PM', all,                                              'PM')
       
  1273         self.add('PP', all,                                              'PP')
       
  1274         self.add('CT', ['ST', 'HE', 'GC', 'OC', 'FL', 'TC', 'PC', 'PM',
       
  1275                         'PP', 'MS', 'MX'],                               'CT')
       
  1276         self.add('MI', ['ST', 'HE', 'GC', 'OC', 'FL', 'CT', 'TC', 'PC', 
       
  1277                  'PM', 'PP', 'MS', 'MX'],                                'MI')
       
  1278         self.add('MP', ['TC', 'GC', 'PC', 'PM', 'PP', 'MI'],             'MP')
       
  1279         self.add('MS', ['MI', 'MP', 'TC'],                               'MS')
       
  1280         self.add('MX', ['MI', 'MX', 'MP', 'TC'],                         'MX')
       
  1281         self.add('MC', ['CT', 'MI', 'MP', 'MS', 'MX', 'PM', 'PP', 'PC'], 'MC')
       
  1282 
       
  1283     def parse(self):
       
  1284         """
       
  1285         Run the state machine, parse the file line by line and call process()
       
  1286         with the current matched symbol.
       
  1287         """
       
  1288         i, lastlen = 1, 0
       
  1289         for line in self.fhandle:
       
  1290             line = line.strip()
       
  1291             if line == '':
       
  1292                 i = i+1
       
  1293                 continue
       
  1294             if line[:3] == '#~ ':
       
  1295                 line = line[3:]
       
  1296                 self.entry_obsolete = 1
       
  1297             else:
       
  1298                 self.entry_obsolete = 0
       
  1299             self.current_token = line
       
  1300             if line[:2] == '#:':
       
  1301                 # we are on a occurrences line
       
  1302                 self.process('OC', i)
       
  1303             elif line[:9] == 'msgctxt "':
       
  1304                 # we are on a msgctxt
       
  1305                 self.process('CT', i)
       
  1306             elif line[:7] == 'msgid "':
       
  1307                 # we are on a msgid
       
  1308                 self.process('MI', i)
       
  1309             elif line[:8] == 'msgstr "':
       
  1310                 # we are on a msgstr
       
  1311                 self.process('MS', i)
       
  1312             elif line[:1] == '"' or line[:4] == '#| "':
       
  1313                 # we are on a continuation line or some metadata
       
  1314                 self.process('MC', i)
       
  1315             elif line[:14] == 'msgid_plural "':
       
  1316                 # we are on a msgid plural
       
  1317                 self.process('MP', i)
       
  1318             elif line[:7] == 'msgstr[':
       
  1319                 # we are on a msgstr plural
       
  1320                 self.process('MX', i)
       
  1321             elif line[:3] == '#, ':
       
  1322                 # we are on a flags line
       
  1323                 self.process('FL', i)
       
  1324             elif line[:2] == '# ' or line == '#':
       
  1325                 if line == '#': line = line + ' '
       
  1326                 # we are on a translator comment line
       
  1327                 self.process('TC', i)
       
  1328             elif line[:2] == '#.':
       
  1329                 # we are on a generated comment line
       
  1330                 self.process('GC', i)
       
  1331             elif line[:15] == '#| msgid_plural':
       
  1332                 # we are on a previous msgid_plural
       
  1333                 self.process('PP', i)
       
  1334             elif line[:8] == '#| msgid':
       
  1335                 self.process('PM', i)
       
  1336                 # we are on a previous msgid
       
  1337             elif line[:10] == '#| msgctxt':
       
  1338                 # we are on a previous msgctxt
       
  1339                 self.process('PC', i)
       
  1340             i = i+1
       
  1341 
       
  1342         if self.current_entry:
       
  1343             # since entries are added when another entry is found, we must add
       
  1344             # the last entry here (only if there are lines)
       
  1345             self.instance.append(self.current_entry)
       
  1346         # before returning the instance, check if there's metadata and if 
       
  1347         # so extract it in a dict
       
  1348         firstentry = self.instance[0]
       
  1349         if firstentry.msgid == '': # metadata found
       
  1350             # remove the entry
       
  1351             firstentry = self.instance.pop(0)
       
  1352             self.instance.metadata_is_fuzzy = firstentry.flags
       
  1353             key = None
       
  1354             for msg in firstentry.msgstr.splitlines():
       
  1355                 try:
       
  1356                     key, val = msg.split(':', 1)
       
  1357                     self.instance.metadata[key] = val.strip()
       
  1358                 except:
       
  1359                     if key is not None:
       
  1360                         self.instance.metadata[key] += '\n'+ msg.strip()
       
  1361         # close opened file
       
  1362         self.fhandle.close()
       
  1363         return self.instance
       
  1364 
       
  1365     def add(self, symbol, states, next_state):
       
  1366         """
       
  1367         Add a transition to the state machine.
       
  1368         Keywords arguments:
       
  1369 
       
  1370         symbol     -- string, the matched token (two chars symbol)
       
  1371         states     -- list, a list of states (two chars symbols)
       
  1372         next_state -- the next state the fsm will have after the action
       
  1373         """
       
  1374         for state in states:
       
  1375             action = getattr(self, 'handle_%s' % next_state.lower())
       
  1376             self.transitions[(symbol, state)] = (action, next_state)
       
  1377 
       
  1378     def process(self, symbol, linenum):
       
  1379         """
       
  1380         Process the transition corresponding to the current state and the
       
  1381         symbol provided.
       
  1382 
       
  1383         Keywords arguments:
       
  1384         symbol  -- string, the matched token (two chars symbol)
       
  1385         linenum -- integer, the current line number of the parsed file
       
  1386         """
       
  1387         try:
       
  1388             (action, state) = self.transitions[(symbol, self.current_state)]
       
  1389             if action():
       
  1390                 self.current_state = state
       
  1391         except Exception, exc:
       
  1392             raise IOError('Syntax error in po file (line %s)' % linenum)
       
  1393 
       
  1394     # state handlers
       
  1395 
       
  1396     def handle_he(self):
       
  1397         """Handle a header comment."""
       
  1398         if self.instance.header != '':
       
  1399             self.instance.header += '\n'
       
  1400         self.instance.header += self.current_token[2:]
       
  1401         return 1
       
  1402 
       
  1403     def handle_tc(self):
       
  1404         """Handle a translator comment."""
       
  1405         if self.current_state in ['MC', 'MS', 'MX']:
       
  1406             self.instance.append(self.current_entry)
       
  1407             self.current_entry = POEntry()
       
  1408         if self.current_entry.tcomment != '':
       
  1409             self.current_entry.tcomment += '\n'
       
  1410         self.current_entry.tcomment += self.current_token[2:]
       
  1411         return True
       
  1412 
       
  1413     def handle_gc(self):
       
  1414         """Handle a generated comment."""
       
  1415         if self.current_state in ['MC', 'MS', 'MX']:
       
  1416             self.instance.append(self.current_entry)
       
  1417             self.current_entry = POEntry()
       
  1418         if self.current_entry.comment != '':
       
  1419             self.current_entry.comment += '\n'
       
  1420         self.current_entry.comment += self.current_token[3:]
       
  1421         return True
       
  1422 
       
  1423     def handle_oc(self):
       
  1424         """Handle a file:num occurence."""
       
  1425         if self.current_state in ['MC', 'MS', 'MX']:
       
  1426             self.instance.append(self.current_entry)
       
  1427             self.current_entry = POEntry()
       
  1428         occurrences = self.current_token[3:].split()
       
  1429         for occurrence in occurrences:
       
  1430             if occurrence != '':
       
  1431                 try:
       
  1432                     fil, line = occurrence.split(':')
       
  1433                     if not line.isdigit():
       
  1434                         fil  = fil + line
       
  1435                         line = ''
       
  1436                     self.current_entry.occurrences.append((fil, line))
       
  1437                 except:
       
  1438                     self.current_entry.occurrences.append((occurrence, ''))
       
  1439         return True
       
  1440 
       
  1441     def handle_fl(self):
       
  1442         """Handle a flags line."""
       
  1443         if self.current_state in ['MC', 'MS', 'MX']:
       
  1444             self.instance.append(self.current_entry)
       
  1445             self.current_entry = POEntry()
       
  1446         self.current_entry.flags += self.current_token[3:].split(', ')
       
  1447         return True
       
  1448 
       
  1449     def handle_pp(self):
       
  1450         """Handle a previous msgid_plural line."""
       
  1451         if self.current_state in ['MC', 'MS', 'MX']:
       
  1452             self.instance.append(self.current_entry)
       
  1453             self.current_entry = POEntry()
       
  1454         self.current_entry.previous_msgid_plural = \
       
  1455             unescape(self.current_token[17:-1])
       
  1456         return True
       
  1457 
       
  1458     def handle_pm(self):
       
  1459         """Handle a previous msgid line."""
       
  1460         if self.current_state in ['MC', 'MS', 'MX']:
       
  1461             self.instance.append(self.current_entry)
       
  1462             self.current_entry = POEntry()
       
  1463         self.current_entry.previous_msgid = \
       
  1464             unescape(self.current_token[10:-1])
       
  1465         return True
       
  1466 
       
  1467     def handle_pc(self):
       
  1468         """Handle a previous msgctxt line."""
       
  1469         if self.current_state in ['MC', 'MS', 'MX']:
       
  1470             self.instance.append(self.current_entry)
       
  1471             self.current_entry = POEntry()
       
  1472         self.current_entry.previous_msgctxt = \
       
  1473             unescape(self.current_token[12:-1])
       
  1474         return True
       
  1475 
       
  1476     def handle_ct(self):
       
  1477         """Handle a msgctxt."""
       
  1478         if self.current_state in ['MC', 'MS', 'MX']:
       
  1479             self.instance.append(self.current_entry)
       
  1480             self.current_entry = POEntry()
       
  1481         self.current_entry.msgctxt = unescape(self.current_token[9:-1])
       
  1482         return True
       
  1483 
       
  1484     def handle_mi(self):
       
  1485         """Handle a msgid."""
       
  1486         if self.current_state in ['MC', 'MS', 'MX']:
       
  1487             self.instance.append(self.current_entry)
       
  1488             self.current_entry = POEntry()
       
  1489         self.current_entry.obsolete = self.entry_obsolete
       
  1490         self.current_entry.msgid = unescape(self.current_token[7:-1])
       
  1491         return True
       
  1492 
       
  1493     def handle_mp(self):
       
  1494         """Handle a msgid plural."""
       
  1495         self.current_entry.msgid_plural = unescape(self.current_token[14:-1])
       
  1496         return True
       
  1497 
       
  1498     def handle_ms(self):
       
  1499         """Handle a msgstr."""
       
  1500         self.current_entry.msgstr = unescape(self.current_token[8:-1])
       
  1501         return True
       
  1502 
       
  1503     def handle_mx(self):
       
  1504         """Handle a msgstr plural."""
       
  1505         index, value = self.current_token[7], self.current_token[11:-1]
       
  1506         self.current_entry.msgstr_plural[index] = unescape(value)
       
  1507         self.msgstr_index = index
       
  1508         return True
       
  1509 
       
  1510     def handle_mc(self):
       
  1511         """Handle a msgid or msgstr continuation line."""
       
  1512         token = unescape(self.current_token[1:-1])
       
  1513         if self.current_state == 'CT':
       
  1514             typ = 'msgctxt'
       
  1515             self.current_entry.msgctxt += token
       
  1516         elif self.current_state == 'MI':
       
  1517             typ = 'msgid'
       
  1518             self.current_entry.msgid += token
       
  1519         elif self.current_state == 'MP':
       
  1520             typ = 'msgid_plural'
       
  1521             self.current_entry.msgid_plural += token
       
  1522         elif self.current_state == 'MS':
       
  1523             typ = 'msgstr'
       
  1524             self.current_entry.msgstr += token
       
  1525         elif self.current_state == 'MX':
       
  1526             typ = 'msgstr[%s]' % self.msgstr_index
       
  1527             self.current_entry.msgstr_plural[self.msgstr_index] += token
       
  1528         elif self.current_state == 'PP':
       
  1529             typ = 'previous_msgid_plural'
       
  1530             token = token[3:]
       
  1531             self.current_entry.previous_msgid_plural += token
       
  1532         elif self.current_state == 'PM':
       
  1533             typ = 'previous_msgid'
       
  1534             token = token[3:]
       
  1535             self.current_entry.previous_msgid += token
       
  1536         elif self.current_state == 'PC':
       
  1537             typ = 'previous_msgctxt'
       
  1538             token = token[3:]
       
  1539             self.current_entry.previous_msgctxt += token
       
  1540         if typ not in self.current_entry._multiline_str:
       
  1541             self.current_entry._multiline_str[typ] = token
       
  1542         else:
       
  1543             self.current_entry._multiline_str[typ] += "__POLIB__NL__" + token
       
  1544         # don't change the current state
       
  1545         return False
       
  1546 
       
  1547 # }}}
       
  1548 # class _MOFileParser {{{
       
  1549 
       
  1550 class _MOFileParser(object):
       
  1551     """
       
  1552     A class to parse binary mo files.
       
  1553     """
       
  1554     BIG_ENDIAN    = 0xde120495
       
  1555     LITTLE_ENDIAN = 0x950412de
       
  1556 
       
  1557     def __init__(self, fpath, *args, **kwargs):
       
  1558         """
       
  1559         Constructor.
       
  1560 
       
  1561         **Arguments**:
       
  1562           - *fpath*: string, path to the po file
       
  1563           - *encoding*: string, the encoding to use, defaults to
       
  1564             "default_encoding" global variable (optional),
       
  1565           - *check_for_duplicates*: whether to check for duplicate entries
       
  1566             when adding entries to the file, default: False (optional).
       
  1567         """
       
  1568         enc = kwargs.get('encoding', default_encoding)
       
  1569         check_dup = kwargs.get('check_for_duplicates', False)
       
  1570         self.fhandle = open(fpath, 'rb')
       
  1571         self.instance = MOFile(
       
  1572             fpath=fpath,
       
  1573             encoding=enc,
       
  1574             check_for_duplicates=check_dup
       
  1575         )
       
  1576 
       
  1577     def parse_magicnumber(self):
       
  1578         """
       
  1579         Parse the magic number and raise an exception if not valid.
       
  1580         """
       
  1581 
       
  1582     def parse(self):
       
  1583         """
       
  1584         Build the instance with the file handle provided in the
       
  1585         constructor.
       
  1586         """
       
  1587         magic_number = self._readbinary('<I', 4)
       
  1588         if magic_number == self.LITTLE_ENDIAN:
       
  1589             ii = '<II'
       
  1590         elif magic_number == self.BIG_ENDIAN:
       
  1591             ii = '>II'
       
  1592         else:
       
  1593             raise IOError('Invalid mo file, magic number is incorrect !')
       
  1594         self.instance.magic_number = magic_number
       
  1595         # parse the version number and the number of strings
       
  1596         self.instance.version, numofstrings = self._readbinary(ii, 8)
       
  1597         # original strings and translation strings hash table offset
       
  1598         msgids_hash_offset, msgstrs_hash_offset = self._readbinary(ii, 8)
       
  1599         # move to msgid hash table and read length and offset of msgids
       
  1600         self.fhandle.seek(msgids_hash_offset)
       
  1601         msgids_index = []
       
  1602         for i in range(numofstrings):
       
  1603             msgids_index.append(self._readbinary(ii, 8))
       
  1604         # move to msgstr hash table and read length and offset of msgstrs
       
  1605         self.fhandle.seek(msgstrs_hash_offset)
       
  1606         msgstrs_index = []
       
  1607         for i in range(numofstrings):
       
  1608             msgstrs_index.append(self._readbinary(ii, 8))
       
  1609         # build entries
       
  1610         for i in range(numofstrings):
       
  1611             self.fhandle.seek(msgids_index[i][1])
       
  1612             msgid = self.fhandle.read(msgids_index[i][0])
       
  1613             self.fhandle.seek(msgstrs_index[i][1])
       
  1614             msgstr = self.fhandle.read(msgstrs_index[i][0])
       
  1615             if i == 0: # metadata
       
  1616                 raw_metadata, metadata = msgstr.split('\n'), {}
       
  1617                 for line in raw_metadata:
       
  1618                     tokens = line.split(':', 1)
       
  1619                     if tokens[0] != '':
       
  1620                         try:
       
  1621                             metadata[tokens[0]] = tokens[1].strip()
       
  1622                         except IndexError:
       
  1623                             metadata[tokens[0]] = ''
       
  1624                 self.instance.metadata = metadata
       
  1625                 continue
       
  1626             # test if we have a plural entry
       
  1627             msgid_tokens = msgid.split('\0')
       
  1628             if len(msgid_tokens) > 1:
       
  1629                 entry = MOEntry(
       
  1630                     msgid=msgid_tokens[0],
       
  1631                     msgid_plural=msgid_tokens[1],
       
  1632                     msgstr_plural=dict((k,v) for k,v in \
       
  1633                         enumerate(msgstr.split('\0')))
       
  1634                 )
       
  1635             else:
       
  1636                 entry = MOEntry(msgid=msgid, msgstr=msgstr)
       
  1637             self.instance.append(entry)
       
  1638         # close opened file
       
  1639         self.fhandle.close()
       
  1640         return self.instance
       
  1641 
       
  1642     def _readbinary(self, fmt, numbytes):
       
  1643         """
       
  1644         Private method that unpack n bytes of data using format <fmt>.
       
  1645         It returns a tuple or a mixed value if the tuple length is 1.
       
  1646         """
       
  1647         bytes = self.fhandle.read(numbytes)
       
  1648         tup = struct.unpack(fmt, bytes)
       
  1649         if len(tup) == 1:
       
  1650             return tup[0]
       
  1651         return tup
       
  1652 
       
  1653 # }}}
       
  1654 # __main__ {{{
       
  1655 
       
  1656 if __name__ == '__main__':
       
  1657     """
       
  1658     **Main function**::
       
  1659       - to **test** the module just run: *python polib.py [-v]*
       
  1660       - to **profile** the module: *python polib.py -p <some_pofile.po>*
       
  1661     """
       
  1662     import sys
       
  1663     if len(sys.argv) > 2 and sys.argv[1] == '-p':
       
  1664         def test(f):
       
  1665             if f.endswith('po'):
       
  1666                 p = pofile(f)
       
  1667             else:
       
  1668                 p = mofile(f)
       
  1669             s = unicode(p)
       
  1670         import profile
       
  1671         profile.run('test("'+sys.argv[2]+'")')
       
  1672     else:
       
  1673         import doctest
       
  1674         doctest.testmod()
       
  1675 
       
  1676 # }}}