i18n/polib.py
changeset 43076 2372284d9457
parent 40185 19fc5a986669
child 43977 04e0e0e73892
--- a/i18n/polib.py	Sat Oct 05 10:29:34 2019 -0400
+++ b/i18n/polib.py	Sun Oct 06 09:45:02 2019 -0400
@@ -17,8 +17,18 @@
 
 __author__ = 'David Jean Louis <izimobil@gmail.com>'
 __version__ = '1.0.7'
-__all__ = ['pofile', 'POFile', 'POEntry', 'mofile', 'MOFile', 'MOEntry',
-           'default_encoding', 'escape', 'unescape', 'detect_encoding', ]
+__all__ = [
+    'pofile',
+    'POFile',
+    'POEntry',
+    'mofile',
+    'MOFile',
+    'MOEntry',
+    'default_encoding',
+    'escape',
+    'unescape',
+    'detect_encoding',
+]
 
 import array
 import codecs
@@ -55,6 +65,7 @@
     def u(s):
         return unicode(s, "unicode_escape")
 
+
 else:
     PY3 = True
     text_type = str
@@ -64,6 +75,8 @@
 
     def u(s):
         return s
+
+
 # }}}
 # _pofile_or_mofile {{{
 
@@ -84,11 +97,13 @@
         f,
         encoding=enc,
         check_for_duplicates=kwargs.get('check_for_duplicates', False),
-        klass=kwargs.get('klass')
+        klass=kwargs.get('klass'),
     )
     instance = parser.parse()
     instance.wrapwidth = kwargs.get('wrapwidth', 78)
     return instance
+
+
 # }}}
 # _is_file {{{
 
@@ -107,6 +122,8 @@
         return os.path.exists(filename_or_contents)
     except (ValueError, UnicodeEncodeError):
         return False
+
+
 # }}}
 # function pofile() {{{
 
@@ -139,6 +156,8 @@
         instance).
     """
     return _pofile_or_mofile(pofile, 'pofile', **kwargs)
+
+
 # }}}
 # function mofile() {{{
 
@@ -172,6 +191,8 @@
         instance).
     """
     return _pofile_or_mofile(mofile, 'mofile', **kwargs)
+
+
 # }}}
 # function detect_encoding() {{{
 
@@ -229,6 +250,8 @@
                     return enc
         f.close()
     return default_encoding
+
+
 # }}}
 # function escape() {{{
 
@@ -238,11 +261,15 @@
     Escapes the characters ``\\\\``, ``\\t``, ``\\n``, ``\\r`` and ``"`` in
     the given string ``st`` and returns it.
     """
-    return st.replace('\\', r'\\')\
-             .replace('\t', r'\t')\
-             .replace('\r', r'\r')\
-             .replace('\n', r'\n')\
-             .replace('\"', r'\"')
+    return (
+        st.replace('\\', r'\\')
+        .replace('\t', r'\t')
+        .replace('\r', r'\r')
+        .replace('\n', r'\n')
+        .replace('\"', r'\"')
+    )
+
+
 # }}}
 # function unescape() {{{
 
@@ -252,6 +279,7 @@
     Unescapes the characters ``\\\\``, ``\\t``, ``\\n``, ``\\r`` and ``"`` in
     the given string ``st`` and returns it.
     """
+
     def unescape_repl(m):
         m = m.group(1)
         if m == 'n':
@@ -263,7 +291,10 @@
         if m == '\\':
             return '\\'
         return m  # handles escaped double quote
+
     return re.sub(r'\\(\\|n|t|r|")', unescape_repl, st)
+
+
 # }}}
 # class _BaseFile {{{
 
@@ -317,8 +348,9 @@
         Returns the unicode representation of the file.
         """
         ret = []
-        entries = [self.metadata_as_entry()] + \
-                  [e for e in self if not e.obsolete]
+        entries = [self.metadata_as_entry()] + [
+            e for e in self if not e.obsolete
+        ]
         for entry in entries:
             ret.append(entry.__unicode__(self.wrapwidth))
         for entry in self.obsolete_entries():
@@ -326,14 +358,17 @@
         ret = u('\n').join(ret)
 
         assert isinstance(ret, text_type)
-        #if type(ret) != text_type:
+        # if type(ret) != text_type:
         #    return unicode(ret, self.encoding)
         return ret
 
     if PY3:
+
         def __str__(self):
             return self.__unicode__()
+
     else:
+
         def __str__(self):
             """
             Returns the string representation of the file.
@@ -353,8 +388,10 @@
         ``entry``
             an instance of :class:`~polib._BaseEntry`.
         """
-        return self.find(entry.msgid, by='msgid', msgctxt=entry.msgctxt) \
+        return (
+            self.find(entry.msgid, by='msgid', msgctxt=entry.msgctxt)
             is not None
+        )
 
     def __eq__(self, other):
         return str(self) == str(other)
@@ -439,8 +476,9 @@
         if self.fpath is None and fpath:
             self.fpath = fpath
 
-    def find(self, st, by='msgid', include_obsolete_entries=False,
-             msgctxt=False):
+    def find(
+        self, st, by='msgid', include_obsolete_entries=False, msgctxt=False
+    ):
         """
         Find the entry which msgid (or property identified by the ``by``
         argument) matches the string ``st``.
@@ -490,7 +528,7 @@
             'Content-Type',
             'Content-Transfer-Encoding',
             'Language',
-            'Plural-Forms'
+            'Plural-Forms',
         ]
         ordered_data = []
         for data in data_order:
@@ -524,10 +562,11 @@
                 return -1
             else:
                 return 0
+
         # add metadata entry
         entries.sort(key=lambda o: o.msgctxt or o.msgid)
         mentry = self.metadata_as_entry()
-        #mentry.msgstr = mentry.msgstr.replace('\\n', '').lstrip()
+        # mentry.msgstr = mentry.msgstr.replace('\\n', '').lstrip()
         entries = [mentry] + entries
         entries_len = len(entries)
         ids, strs = b(''), b('')
@@ -578,8 +617,8 @@
             # start of value index
             7 * 4 + entries_len * 8,
             # size and offset of hash table, we don't use hash tables
-            0, keystart
-
+            0,
+            keystart,
         )
         if PY3 and sys.version_info.minor > 1:  # python 3.2 or superior
             output += array.array("i", offsets).tobytes()
@@ -597,6 +636,8 @@
         if isinstance(mixed, text_type):
             mixed = mixed.encode(self.encoding)
         return mixed
+
+
 # }}}
 # class POFile {{{
 
@@ -658,8 +699,11 @@
         """
         Convenience method that returns the list of untranslated entries.
         """
-        return [e for e in self if not e.translated() and not e.obsolete
-                and not 'fuzzy' in e.flags]
+        return [
+            e
+            for e in self
+            if not e.translated() and not e.obsolete and not 'fuzzy' in e.flags
+        ]
 
     def fuzzy_entries(self):
         """
@@ -703,6 +747,8 @@
         for entry in self:
             if entry.msgid not in refpot_msgids:
                 entry.obsolete = True
+
+
 # }}}
 # class MOFile {{{
 
@@ -713,8 +759,9 @@
     This class inherits the :class:`~polib._BaseFile` class and, by
     extension, the python ``list`` type.
     """
-    MAGIC = 0x950412de
-    MAGIC_SWAPPED = 0xde120495
+
+    MAGIC = 0x950412DE
+    MAGIC_SWAPPED = 0xDE120495
 
     def __init__(self, *args, **kwargs):
         """
@@ -776,6 +823,8 @@
         Convenience method to keep the same interface with POFile instances.
         """
         return []
+
+
 # }}}
 # class _BaseEntry {{{
 
@@ -831,14 +880,16 @@
         ret = []
         # write the msgctxt if any
         if self.msgctxt is not None:
-            ret += self._str_field("msgctxt", delflag, "", self.msgctxt,
-                                   wrapwidth)
+            ret += self._str_field(
+                "msgctxt", delflag, "", self.msgctxt, wrapwidth
+            )
         # write the msgid
         ret += self._str_field("msgid", delflag, "", self.msgid, wrapwidth)
         # write the msgid_plural if any
         if self.msgid_plural:
-            ret += self._str_field("msgid_plural", delflag, "",
-                                   self.msgid_plural, wrapwidth)
+            ret += self._str_field(
+                "msgid_plural", delflag, "", self.msgid_plural, wrapwidth
+            )
         if self.msgstr_plural:
             # write the msgstr_plural if any
             msgstrs = self.msgstr_plural
@@ -847,20 +898,25 @@
             for index in keys:
                 msgstr = msgstrs[index]
                 plural_index = '[%s]' % index
-                ret += self._str_field("msgstr", delflag, plural_index, msgstr,
-                                       wrapwidth)
+                ret += self._str_field(
+                    "msgstr", delflag, plural_index, msgstr, wrapwidth
+                )
         else:
             # otherwise write the msgstr
-            ret += self._str_field("msgstr", delflag, "", self.msgstr,
-                                   wrapwidth)
+            ret += self._str_field(
+                "msgstr", delflag, "", self.msgstr, wrapwidth
+            )
         ret.append('')
         ret = u('\n').join(ret)
         return ret
 
     if PY3:
+
         def __str__(self):
             return self.__unicode__()
+
     else:
+
         def __str__(self):
             """
             Returns the string representation of the entry.
@@ -870,8 +926,7 @@
     def __eq__(self, other):
         return str(self) == str(other)
 
-    def _str_field(self, fieldname, delflag, plural_index, field,
-                   wrapwidth=78):
+    def _str_field(self, fieldname, delflag, plural_index, field, wrapwidth=78):
         lines = field.splitlines(True)
         if len(lines) > 1:
             lines = [''] + lines  # start with initial empty line
@@ -888,23 +943,30 @@
             real_wrapwidth = wrapwidth - flength + specialchars_count
             if wrapwidth > 0 and len(field) > real_wrapwidth:
                 # Wrap the line but take field name into account
-                lines = [''] + [unescape(item) for item in wrap(
-                    escaped_field,
-                    wrapwidth - 2,  # 2 for quotes ""
-                    drop_whitespace=False,
-                    break_long_words=False
-                )]
+                lines = [''] + [
+                    unescape(item)
+                    for item in wrap(
+                        escaped_field,
+                        wrapwidth - 2,  # 2 for quotes ""
+                        drop_whitespace=False,
+                        break_long_words=False,
+                    )
+                ]
             else:
                 lines = [field]
         if fieldname.startswith('previous_'):
             # quick and dirty trick to get the real field name
             fieldname = fieldname[9:]
 
-        ret = ['%s%s%s "%s"' % (delflag, fieldname, plural_index,
-                                escape(lines.pop(0)))]
+        ret = [
+            '%s%s%s "%s"'
+            % (delflag, fieldname, plural_index, escape(lines.pop(0)))
+        ]
         for line in lines:
             ret.append('%s"%s"' % (delflag, escape(line)))
         return ret
+
+
 # }}}
 # class POEntry {{{
 
@@ -972,7 +1034,7 @@
                             wrapwidth,
                             initial_indent=c[1],
                             subsequent_indent=c[1],
-                            break_long_words=False
+                            break_long_words=False,
                         )
                     else:
                         ret.append('%s%s' % (c[1], comment))
@@ -991,13 +1053,16 @@
                 # what we want for filenames, so the dirty hack is to
                 # temporally replace hyphens with a char that a file cannot
                 # contain, like "*"
-                ret += [l.replace('*', '-') for l in wrap(
-                    filestr.replace('-', '*'),
-                    wrapwidth,
-                    initial_indent='#: ',
-                    subsequent_indent='#: ',
-                    break_long_words=False
-                )]
+                ret += [
+                    l.replace('*', '-')
+                    for l in wrap(
+                        filestr.replace('-', '*'),
+                        wrapwidth,
+                        initial_indent='#: ',
+                        subsequent_indent='#: ',
+                        break_long_words=False,
+                    )
+                ]
             else:
                 ret.append('#: ' + filestr)
 
@@ -1006,8 +1071,7 @@
             ret.append('#, %s' % ', '.join(self.flags))
 
         # previous context and previous msgid/msgid_plural
-        fields = ['previous_msgctxt', 'previous_msgid',
-                  'previous_msgid_plural']
+        fields = ['previous_msgctxt', 'previous_msgid', 'previous_msgid_plural']
         for f in fields:
             val = getattr(self, f)
             if val:
@@ -1017,7 +1081,7 @@
         ret = u('\n').join(ret)
 
         assert isinstance(ret, text_type)
-        #if type(ret) != types.UnicodeType:
+        # if type(ret) != types.UnicodeType:
         #    return unicode(ret, self.encoding)
         return ret
 
@@ -1131,6 +1195,8 @@
 
     def __hash__(self):
         return hash((self.msgid, self.msgstr))
+
+
 # }}}
 # class MOEntry {{{
 
@@ -1139,6 +1205,7 @@
     """
     Represents a mo file entry.
     """
+
     def __init__(self, *args, **kwargs):
         """
         Constructor, accepts the following keyword arguments,
@@ -1168,6 +1235,7 @@
     def __hash__(self):
         return hash((self.msgid, self.msgstr))
 
+
 # }}}
 # class _POFileParser {{{
 
@@ -1211,7 +1279,7 @@
         self.instance = klass(
             pofile=pofile,
             encoding=enc,
-            check_for_duplicates=kwargs.get('check_for_duplicates', False)
+            check_for_duplicates=kwargs.get('check_for_duplicates', False),
         )
         self.transitions = {}
         self.current_line = 0
@@ -1238,25 +1306,61 @@
         #     * MS: a msgstr
         #     * MX: a msgstr plural
         #     * MC: a msgid or msgstr continuation line
-        all = ['st', 'he', 'gc', 'oc', 'fl', 'ct', 'pc', 'pm', 'pp', 'tc',
-               'ms', 'mp', 'mx', 'mi']
+        all = [
+            'st',
+            'he',
+            'gc',
+            'oc',
+            'fl',
+            'ct',
+            'pc',
+            'pm',
+            'pp',
+            'tc',
+            'ms',
+            'mp',
+            'mx',
+            'mi',
+        ]
 
-        self.add('tc', ['st', 'he'],                                     'he')
-        self.add('tc', ['gc', 'oc', 'fl', 'tc', 'pc', 'pm', 'pp', 'ms',
-                        'mp', 'mx', 'mi'],                               'tc')
-        self.add('gc', all,                                              'gc')
-        self.add('oc', all,                                              'oc')
-        self.add('fl', all,                                              'fl')
-        self.add('pc', all,                                              'pc')
-        self.add('pm', all,                                              'pm')
-        self.add('pp', all,                                              'pp')
-        self.add('ct', ['st', 'he', 'gc', 'oc', 'fl', 'tc', 'pc', 'pm',
-                        'pp', 'ms', 'mx'],                               'ct')
-        self.add('mi', ['st', 'he', 'gc', 'oc', 'fl', 'ct', 'tc', 'pc',
-                 'pm', 'pp', 'ms', 'mx'],                                'mi')
-        self.add('mp', ['tc', 'gc', 'pc', 'pm', 'pp', 'mi'],             'mp')
-        self.add('ms', ['mi', 'mp', 'tc'],                               'ms')
-        self.add('mx', ['mi', 'mx', 'mp', 'tc'],                         'mx')
+        self.add('tc', ['st', 'he'], 'he')
+        self.add(
+            'tc',
+            ['gc', 'oc', 'fl', 'tc', 'pc', 'pm', 'pp', 'ms', 'mp', 'mx', 'mi'],
+            'tc',
+        )
+        self.add('gc', all, 'gc')
+        self.add('oc', all, 'oc')
+        self.add('fl', all, 'fl')
+        self.add('pc', all, 'pc')
+        self.add('pm', all, 'pm')
+        self.add('pp', all, 'pp')
+        self.add(
+            'ct',
+            ['st', 'he', 'gc', 'oc', 'fl', 'tc', 'pc', 'pm', 'pp', 'ms', 'mx'],
+            'ct',
+        )
+        self.add(
+            'mi',
+            [
+                'st',
+                'he',
+                'gc',
+                'oc',
+                'fl',
+                'ct',
+                'tc',
+                'pc',
+                'pm',
+                'pp',
+                'ms',
+                'mx',
+            ],
+            'mi',
+        )
+        self.add('mp', ['tc', 'gc', 'pc', 'pm', 'pp', 'mi'], 'mp')
+        self.add('ms', ['mi', 'mp', 'tc'], 'ms')
+        self.add('mx', ['mi', 'mx', 'mp', 'tc'], 'mx')
         self.add('mc', ['ct', 'mi', 'mp', 'ms', 'mx', 'pm', 'pp', 'pc'], 'mc')
 
     def parse(self):
@@ -1300,11 +1404,13 @@
             # Take care of keywords like
             # msgid, msgid_plural, msgctxt & msgstr.
             if tokens[0] in keywords and nb_tokens > 1:
-                line = line[len(tokens[0]):].lstrip()
+                line = line[len(tokens[0]) :].lstrip()
                 if re.search(r'([^\\]|^)"', line[1:-1]):
-                    raise IOError('Syntax error in po file %s (line %s): '
-                                  'unescaped double quote found' %
-                                  (self.instance.fpath, self.current_line))
+                    raise IOError(
+                        'Syntax error in po file %s (line %s): '
+                        'unescaped double quote found'
+                        % (self.instance.fpath, self.current_line)
+                    )
                 self.current_token = line
                 self.process(keywords[tokens[0]])
                 continue
@@ -1320,9 +1426,11 @@
             elif line[:1] == '"':
                 # we are on a continuation line
                 if re.search(r'([^\\]|^)"', line[1:-1]):
-                    raise IOError('Syntax error in po file %s (line %s): '
-                                  'unescaped double quote found' %
-                                  (self.instance.fpath, self.current_line))
+                    raise IOError(
+                        'Syntax error in po file %s (line %s): '
+                        'unescaped double quote found'
+                        % (self.instance.fpath, self.current_line)
+                    )
                 self.process('mc')
 
             elif line[:7] == 'msgstr[':
@@ -1349,8 +1457,10 @@
 
             elif tokens[0] == '#|':
                 if nb_tokens <= 1:
-                    raise IOError('Syntax error in po file %s (line %s)' %
-                                  (self.instance.fpath, self.current_line))
+                    raise IOError(
+                        'Syntax error in po file %s (line %s)'
+                        % (self.instance.fpath, self.current_line)
+                    )
 
                 # Remove the marker and any whitespace right after that.
                 line = line[2:].lstrip()
@@ -1363,30 +1473,38 @@
 
                 if nb_tokens == 2:
                     # Invalid continuation line.
-                    raise IOError('Syntax error in po file %s (line %s): '
-                                  'invalid continuation line' %
-                                  (self.instance.fpath, self.current_line))
+                    raise IOError(
+                        'Syntax error in po file %s (line %s): '
+                        'invalid continuation line'
+                        % (self.instance.fpath, self.current_line)
+                    )
 
                 # we are on a "previous translation" comment line,
                 if tokens[1] not in prev_keywords:
                     # Unknown keyword in previous translation comment.
-                    raise IOError('Syntax error in po file %s (line %s): '
-                                  'unknown keyword %s' %
-                                  (self.instance.fpath, self.current_line,
-                                   tokens[1]))
+                    raise IOError(
+                        'Syntax error in po file %s (line %s): '
+                        'unknown keyword %s'
+                        % (self.instance.fpath, self.current_line, tokens[1])
+                    )
 
                 # Remove the keyword and any whitespace
                 # between it and the starting quote.
-                line = line[len(tokens[1]):].lstrip()
+                line = line[len(tokens[1]) :].lstrip()
                 self.current_token = line
                 self.process(prev_keywords[tokens[1]])
 
             else:
-                raise IOError('Syntax error in po file %s (line %s)' %
-                              (self.instance.fpath, self.current_line))
+                raise IOError(
+                    'Syntax error in po file %s (line %s)'
+                    % (self.instance.fpath, self.current_line)
+                )
 
-        if self.current_entry and len(tokens) > 0 and \
-           not tokens[0].startswith('#'):
+        if (
+            self.current_entry
+            and len(tokens) > 0
+            and not tokens[0].startswith('#')
+        ):
             # since entries are added when another entry is found, we must add
             # the last entry here (only if there are lines). Trailing comments
             # are ignored
@@ -1449,8 +1567,9 @@
             if action():
                 self.current_state = state
         except Exception:
-            raise IOError('Syntax error in po file (line %s)' %
-                          self.current_line)
+            raise IOError(
+                'Syntax error in po file (line %s)' % self.current_line
+            )
 
     # state handlers
 
@@ -1507,8 +1626,9 @@
         if self.current_state in ['mc', 'ms', 'mx']:
             self.instance.append(self.current_entry)
             self.current_entry = POEntry(linenum=self.current_line)
-        self.current_entry.flags += [c.strip() for c in
-                                     self.current_token[3:].split(',')]
+        self.current_entry.flags += [
+            c.strip() for c in self.current_token[3:].split(',')
+        ]
         return True
 
     def handle_pp(self):
@@ -1516,8 +1636,9 @@
         if self.current_state in ['mc', 'ms', 'mx']:
             self.instance.append(self.current_entry)
             self.current_entry = POEntry(linenum=self.current_line)
-        self.current_entry.previous_msgid_plural = \
-            unescape(self.current_token[1:-1])
+        self.current_entry.previous_msgid_plural = unescape(
+            self.current_token[1:-1]
+        )
         return True
 
     def handle_pm(self):
@@ -1525,8 +1646,7 @@
         if self.current_state in ['mc', 'ms', 'mx']:
             self.instance.append(self.current_entry)
             self.current_entry = POEntry(linenum=self.current_line)
-        self.current_entry.previous_msgid = \
-            unescape(self.current_token[1:-1])
+        self.current_entry.previous_msgid = unescape(self.current_token[1:-1])
         return True
 
     def handle_pc(self):
@@ -1534,8 +1654,7 @@
         if self.current_state in ['mc', 'ms', 'mx']:
             self.instance.append(self.current_entry)
             self.current_entry = POEntry(linenum=self.current_line)
-        self.current_entry.previous_msgctxt = \
-            unescape(self.current_token[1:-1])
+        self.current_entry.previous_msgctxt = unescape(self.current_token[1:-1])
         return True
 
     def handle_ct(self):
@@ -1568,7 +1687,7 @@
     def handle_mx(self):
         """Handle a msgstr plural."""
         index = self.current_token[7]
-        value = self.current_token[self.current_token.find('"') + 1:-1]
+        value = self.current_token[self.current_token.find('"') + 1 : -1]
         self.current_entry.msgstr_plural[int(index)] = unescape(value)
         self.msgstr_index = int(index)
         return True
@@ -1594,6 +1713,8 @@
             self.current_entry.previous_msgctxt += token
         # don't change the current state
         return False
+
+
 # }}}
 # class _MOFileParser {{{
 
@@ -1628,7 +1749,7 @@
         self.instance = klass(
             fpath=mofile,
             encoding=kwargs.get('encoding', default_encoding),
-            check_for_duplicates=kwargs.get('check_for_duplicates', False)
+            check_for_duplicates=kwargs.get('check_for_duplicates', False),
         )
 
     def __del__(self):
@@ -1699,8 +1820,9 @@
                 entry = self._build_entry(
                     msgid=msgid_tokens[0],
                     msgid_plural=msgid_tokens[1],
-                    msgstr_plural=dict((k, v) for k, v in
-                                       enumerate(msgstr.split(b('\0'))))
+                    msgstr_plural=dict(
+                        (k, v) for k, v in enumerate(msgstr.split(b('\0')))
+                    ),
                 )
             else:
                 entry = self._build_entry(msgid=msgid, msgstr=msgstr)
@@ -1709,8 +1831,9 @@
         self.fhandle.close()
         return self.instance
 
-    def _build_entry(self, msgid, msgstr=None, msgid_plural=None,
-                     msgstr_plural=None):
+    def _build_entry(
+        self, msgid, msgstr=None, msgid_plural=None, msgstr_plural=None
+    ):
         msgctxt_msgid = msgid.split(b('\x04'))
         encoding = self.instance.encoding
         if len(msgctxt_msgid) > 1:
@@ -1740,6 +1863,8 @@
         if len(tup) == 1:
             return tup[0]
         return tup
+
+
 # }}}
 # class TextWrapper {{{
 
@@ -1749,6 +1874,7 @@
     Subclass of textwrap.TextWrapper that backport the
     drop_whitespace option.
     """
+
     def __init__(self, *args, **kwargs):
         drop_whitespace = kwargs.pop('drop_whitespace', True)
         textwrap.TextWrapper.__init__(self, *args, **kwargs)
@@ -1823,6 +1949,8 @@
                 lines.append(indent + ''.join(cur_line))
 
         return lines
+
+
 # }}}
 # function wrap() {{{
 
@@ -1835,4 +1963,5 @@
         return TextWrapper(width=width, **kwargs).wrap(text)
     return textwrap.wrap(text, width=width, **kwargs)
 
+
 # }}}