--- a/i18n/polib.py Sat Oct 05 10:29:34 2019 -0400
+++ b/i18n/polib.py Sun Oct 06 09:45:02 2019 -0400
@@ -17,8 +17,18 @@
__author__ = 'David Jean Louis <izimobil@gmail.com>'
__version__ = '1.0.7'
-__all__ = ['pofile', 'POFile', 'POEntry', 'mofile', 'MOFile', 'MOEntry',
- 'default_encoding', 'escape', 'unescape', 'detect_encoding', ]
+__all__ = [
+ 'pofile',
+ 'POFile',
+ 'POEntry',
+ 'mofile',
+ 'MOFile',
+ 'MOEntry',
+ 'default_encoding',
+ 'escape',
+ 'unescape',
+ 'detect_encoding',
+]
import array
import codecs
@@ -55,6 +65,7 @@
def u(s):
return unicode(s, "unicode_escape")
+
else:
PY3 = True
text_type = str
@@ -64,6 +75,8 @@
def u(s):
return s
+
+
# }}}
# _pofile_or_mofile {{{
@@ -84,11 +97,13 @@
f,
encoding=enc,
check_for_duplicates=kwargs.get('check_for_duplicates', False),
- klass=kwargs.get('klass')
+ klass=kwargs.get('klass'),
)
instance = parser.parse()
instance.wrapwidth = kwargs.get('wrapwidth', 78)
return instance
+
+
# }}}
# _is_file {{{
@@ -107,6 +122,8 @@
return os.path.exists(filename_or_contents)
except (ValueError, UnicodeEncodeError):
return False
+
+
# }}}
# function pofile() {{{
@@ -139,6 +156,8 @@
instance).
"""
return _pofile_or_mofile(pofile, 'pofile', **kwargs)
+
+
# }}}
# function mofile() {{{
@@ -172,6 +191,8 @@
instance).
"""
return _pofile_or_mofile(mofile, 'mofile', **kwargs)
+
+
# }}}
# function detect_encoding() {{{
@@ -229,6 +250,8 @@
return enc
f.close()
return default_encoding
+
+
# }}}
# function escape() {{{
@@ -238,11 +261,15 @@
Escapes the characters ``\\\\``, ``\\t``, ``\\n``, ``\\r`` and ``"`` in
the given string ``st`` and returns it.
"""
- return st.replace('\\', r'\\')\
- .replace('\t', r'\t')\
- .replace('\r', r'\r')\
- .replace('\n', r'\n')\
- .replace('\"', r'\"')
+ return (
+ st.replace('\\', r'\\')
+ .replace('\t', r'\t')
+ .replace('\r', r'\r')
+ .replace('\n', r'\n')
+ .replace('\"', r'\"')
+ )
+
+
# }}}
# function unescape() {{{
@@ -252,6 +279,7 @@
Unescapes the characters ``\\\\``, ``\\t``, ``\\n``, ``\\r`` and ``"`` in
the given string ``st`` and returns it.
"""
+
def unescape_repl(m):
m = m.group(1)
if m == 'n':
@@ -263,7 +291,10 @@
if m == '\\':
return '\\'
return m # handles escaped double quote
+
return re.sub(r'\\(\\|n|t|r|")', unescape_repl, st)
+
+
# }}}
# class _BaseFile {{{
@@ -317,8 +348,9 @@
Returns the unicode representation of the file.
"""
ret = []
- entries = [self.metadata_as_entry()] + \
- [e for e in self if not e.obsolete]
+ entries = [self.metadata_as_entry()] + [
+ e for e in self if not e.obsolete
+ ]
for entry in entries:
ret.append(entry.__unicode__(self.wrapwidth))
for entry in self.obsolete_entries():
@@ -326,14 +358,17 @@
ret = u('\n').join(ret)
assert isinstance(ret, text_type)
- #if type(ret) != text_type:
+ # if type(ret) != text_type:
# return unicode(ret, self.encoding)
return ret
if PY3:
+
def __str__(self):
return self.__unicode__()
+
else:
+
def __str__(self):
"""
Returns the string representation of the file.
@@ -353,8 +388,10 @@
``entry``
an instance of :class:`~polib._BaseEntry`.
"""
- return self.find(entry.msgid, by='msgid', msgctxt=entry.msgctxt) \
+ return (
+ self.find(entry.msgid, by='msgid', msgctxt=entry.msgctxt)
is not None
+ )
def __eq__(self, other):
return str(self) == str(other)
@@ -439,8 +476,9 @@
if self.fpath is None and fpath:
self.fpath = fpath
- def find(self, st, by='msgid', include_obsolete_entries=False,
- msgctxt=False):
+ def find(
+ self, st, by='msgid', include_obsolete_entries=False, msgctxt=False
+ ):
"""
Find the entry which msgid (or property identified by the ``by``
argument) matches the string ``st``.
@@ -490,7 +528,7 @@
'Content-Type',
'Content-Transfer-Encoding',
'Language',
- 'Plural-Forms'
+ 'Plural-Forms',
]
ordered_data = []
for data in data_order:
@@ -524,10 +562,11 @@
return -1
else:
return 0
+
# add metadata entry
entries.sort(key=lambda o: o.msgctxt or o.msgid)
mentry = self.metadata_as_entry()
- #mentry.msgstr = mentry.msgstr.replace('\\n', '').lstrip()
+ # mentry.msgstr = mentry.msgstr.replace('\\n', '').lstrip()
entries = [mentry] + entries
entries_len = len(entries)
ids, strs = b(''), b('')
@@ -578,8 +617,8 @@
# start of value index
7 * 4 + entries_len * 8,
# size and offset of hash table, we don't use hash tables
- 0, keystart
-
+ 0,
+ keystart,
)
if PY3 and sys.version_info.minor > 1: # python 3.2 or superior
output += array.array("i", offsets).tobytes()
@@ -597,6 +636,8 @@
if isinstance(mixed, text_type):
mixed = mixed.encode(self.encoding)
return mixed
+
+
# }}}
# class POFile {{{
@@ -658,8 +699,11 @@
"""
Convenience method that returns the list of untranslated entries.
"""
- return [e for e in self if not e.translated() and not e.obsolete
- and not 'fuzzy' in e.flags]
+ return [
+ e
+ for e in self
+ if not e.translated() and not e.obsolete and not 'fuzzy' in e.flags
+ ]
def fuzzy_entries(self):
"""
@@ -703,6 +747,8 @@
for entry in self:
if entry.msgid not in refpot_msgids:
entry.obsolete = True
+
+
# }}}
# class MOFile {{{
@@ -713,8 +759,9 @@
This class inherits the :class:`~polib._BaseFile` class and, by
extension, the python ``list`` type.
"""
- MAGIC = 0x950412de
- MAGIC_SWAPPED = 0xde120495
+
+ MAGIC = 0x950412DE
+ MAGIC_SWAPPED = 0xDE120495
def __init__(self, *args, **kwargs):
"""
@@ -776,6 +823,8 @@
Convenience method to keep the same interface with POFile instances.
"""
return []
+
+
# }}}
# class _BaseEntry {{{
@@ -831,14 +880,16 @@
ret = []
# write the msgctxt if any
if self.msgctxt is not None:
- ret += self._str_field("msgctxt", delflag, "", self.msgctxt,
- wrapwidth)
+ ret += self._str_field(
+ "msgctxt", delflag, "", self.msgctxt, wrapwidth
+ )
# write the msgid
ret += self._str_field("msgid", delflag, "", self.msgid, wrapwidth)
# write the msgid_plural if any
if self.msgid_plural:
- ret += self._str_field("msgid_plural", delflag, "",
- self.msgid_plural, wrapwidth)
+ ret += self._str_field(
+ "msgid_plural", delflag, "", self.msgid_plural, wrapwidth
+ )
if self.msgstr_plural:
# write the msgstr_plural if any
msgstrs = self.msgstr_plural
@@ -847,20 +898,25 @@
for index in keys:
msgstr = msgstrs[index]
plural_index = '[%s]' % index
- ret += self._str_field("msgstr", delflag, plural_index, msgstr,
- wrapwidth)
+ ret += self._str_field(
+ "msgstr", delflag, plural_index, msgstr, wrapwidth
+ )
else:
# otherwise write the msgstr
- ret += self._str_field("msgstr", delflag, "", self.msgstr,
- wrapwidth)
+ ret += self._str_field(
+ "msgstr", delflag, "", self.msgstr, wrapwidth
+ )
ret.append('')
ret = u('\n').join(ret)
return ret
if PY3:
+
def __str__(self):
return self.__unicode__()
+
else:
+
def __str__(self):
"""
Returns the string representation of the entry.
@@ -870,8 +926,7 @@
def __eq__(self, other):
return str(self) == str(other)
- def _str_field(self, fieldname, delflag, plural_index, field,
- wrapwidth=78):
+ def _str_field(self, fieldname, delflag, plural_index, field, wrapwidth=78):
lines = field.splitlines(True)
if len(lines) > 1:
lines = [''] + lines # start with initial empty line
@@ -888,23 +943,30 @@
real_wrapwidth = wrapwidth - flength + specialchars_count
if wrapwidth > 0 and len(field) > real_wrapwidth:
# Wrap the line but take field name into account
- lines = [''] + [unescape(item) for item in wrap(
- escaped_field,
- wrapwidth - 2, # 2 for quotes ""
- drop_whitespace=False,
- break_long_words=False
- )]
+ lines = [''] + [
+ unescape(item)
+ for item in wrap(
+ escaped_field,
+ wrapwidth - 2, # 2 for quotes ""
+ drop_whitespace=False,
+ break_long_words=False,
+ )
+ ]
else:
lines = [field]
if fieldname.startswith('previous_'):
# quick and dirty trick to get the real field name
fieldname = fieldname[9:]
- ret = ['%s%s%s "%s"' % (delflag, fieldname, plural_index,
- escape(lines.pop(0)))]
+ ret = [
+ '%s%s%s "%s"'
+ % (delflag, fieldname, plural_index, escape(lines.pop(0)))
+ ]
for line in lines:
ret.append('%s"%s"' % (delflag, escape(line)))
return ret
+
+
# }}}
# class POEntry {{{
@@ -972,7 +1034,7 @@
wrapwidth,
initial_indent=c[1],
subsequent_indent=c[1],
- break_long_words=False
+ break_long_words=False,
)
else:
ret.append('%s%s' % (c[1], comment))
@@ -991,13 +1053,16 @@
# what we want for filenames, so the dirty hack is to
# temporally replace hyphens with a char that a file cannot
# contain, like "*"
- ret += [l.replace('*', '-') for l in wrap(
- filestr.replace('-', '*'),
- wrapwidth,
- initial_indent='#: ',
- subsequent_indent='#: ',
- break_long_words=False
- )]
+ ret += [
+ l.replace('*', '-')
+ for l in wrap(
+ filestr.replace('-', '*'),
+ wrapwidth,
+ initial_indent='#: ',
+ subsequent_indent='#: ',
+ break_long_words=False,
+ )
+ ]
else:
ret.append('#: ' + filestr)
@@ -1006,8 +1071,7 @@
ret.append('#, %s' % ', '.join(self.flags))
# previous context and previous msgid/msgid_plural
- fields = ['previous_msgctxt', 'previous_msgid',
- 'previous_msgid_plural']
+ fields = ['previous_msgctxt', 'previous_msgid', 'previous_msgid_plural']
for f in fields:
val = getattr(self, f)
if val:
@@ -1017,7 +1081,7 @@
ret = u('\n').join(ret)
assert isinstance(ret, text_type)
- #if type(ret) != types.UnicodeType:
+ # if type(ret) != types.UnicodeType:
# return unicode(ret, self.encoding)
return ret
@@ -1131,6 +1195,8 @@
def __hash__(self):
return hash((self.msgid, self.msgstr))
+
+
# }}}
# class MOEntry {{{
@@ -1139,6 +1205,7 @@
"""
Represents a mo file entry.
"""
+
def __init__(self, *args, **kwargs):
"""
Constructor, accepts the following keyword arguments,
@@ -1168,6 +1235,7 @@
def __hash__(self):
return hash((self.msgid, self.msgstr))
+
# }}}
# class _POFileParser {{{
@@ -1211,7 +1279,7 @@
self.instance = klass(
pofile=pofile,
encoding=enc,
- check_for_duplicates=kwargs.get('check_for_duplicates', False)
+ check_for_duplicates=kwargs.get('check_for_duplicates', False),
)
self.transitions = {}
self.current_line = 0
@@ -1238,25 +1306,61 @@
# * MS: a msgstr
# * MX: a msgstr plural
# * MC: a msgid or msgstr continuation line
- all = ['st', 'he', 'gc', 'oc', 'fl', 'ct', 'pc', 'pm', 'pp', 'tc',
- 'ms', 'mp', 'mx', 'mi']
+ all = [
+ 'st',
+ 'he',
+ 'gc',
+ 'oc',
+ 'fl',
+ 'ct',
+ 'pc',
+ 'pm',
+ 'pp',
+ 'tc',
+ 'ms',
+ 'mp',
+ 'mx',
+ 'mi',
+ ]
- self.add('tc', ['st', 'he'], 'he')
- self.add('tc', ['gc', 'oc', 'fl', 'tc', 'pc', 'pm', 'pp', 'ms',
- 'mp', 'mx', 'mi'], 'tc')
- self.add('gc', all, 'gc')
- self.add('oc', all, 'oc')
- self.add('fl', all, 'fl')
- self.add('pc', all, 'pc')
- self.add('pm', all, 'pm')
- self.add('pp', all, 'pp')
- self.add('ct', ['st', 'he', 'gc', 'oc', 'fl', 'tc', 'pc', 'pm',
- 'pp', 'ms', 'mx'], 'ct')
- self.add('mi', ['st', 'he', 'gc', 'oc', 'fl', 'ct', 'tc', 'pc',
- 'pm', 'pp', 'ms', 'mx'], 'mi')
- self.add('mp', ['tc', 'gc', 'pc', 'pm', 'pp', 'mi'], 'mp')
- self.add('ms', ['mi', 'mp', 'tc'], 'ms')
- self.add('mx', ['mi', 'mx', 'mp', 'tc'], 'mx')
+ self.add('tc', ['st', 'he'], 'he')
+ self.add(
+ 'tc',
+ ['gc', 'oc', 'fl', 'tc', 'pc', 'pm', 'pp', 'ms', 'mp', 'mx', 'mi'],
+ 'tc',
+ )
+ self.add('gc', all, 'gc')
+ self.add('oc', all, 'oc')
+ self.add('fl', all, 'fl')
+ self.add('pc', all, 'pc')
+ self.add('pm', all, 'pm')
+ self.add('pp', all, 'pp')
+ self.add(
+ 'ct',
+ ['st', 'he', 'gc', 'oc', 'fl', 'tc', 'pc', 'pm', 'pp', 'ms', 'mx'],
+ 'ct',
+ )
+ self.add(
+ 'mi',
+ [
+ 'st',
+ 'he',
+ 'gc',
+ 'oc',
+ 'fl',
+ 'ct',
+ 'tc',
+ 'pc',
+ 'pm',
+ 'pp',
+ 'ms',
+ 'mx',
+ ],
+ 'mi',
+ )
+ self.add('mp', ['tc', 'gc', 'pc', 'pm', 'pp', 'mi'], 'mp')
+ self.add('ms', ['mi', 'mp', 'tc'], 'ms')
+ self.add('mx', ['mi', 'mx', 'mp', 'tc'], 'mx')
self.add('mc', ['ct', 'mi', 'mp', 'ms', 'mx', 'pm', 'pp', 'pc'], 'mc')
def parse(self):
@@ -1300,11 +1404,13 @@
# Take care of keywords like
# msgid, msgid_plural, msgctxt & msgstr.
if tokens[0] in keywords and nb_tokens > 1:
- line = line[len(tokens[0]):].lstrip()
+ line = line[len(tokens[0]) :].lstrip()
if re.search(r'([^\\]|^)"', line[1:-1]):
- raise IOError('Syntax error in po file %s (line %s): '
- 'unescaped double quote found' %
- (self.instance.fpath, self.current_line))
+ raise IOError(
+ 'Syntax error in po file %s (line %s): '
+ 'unescaped double quote found'
+ % (self.instance.fpath, self.current_line)
+ )
self.current_token = line
self.process(keywords[tokens[0]])
continue
@@ -1320,9 +1426,11 @@
elif line[:1] == '"':
# we are on a continuation line
if re.search(r'([^\\]|^)"', line[1:-1]):
- raise IOError('Syntax error in po file %s (line %s): '
- 'unescaped double quote found' %
- (self.instance.fpath, self.current_line))
+ raise IOError(
+ 'Syntax error in po file %s (line %s): '
+ 'unescaped double quote found'
+ % (self.instance.fpath, self.current_line)
+ )
self.process('mc')
elif line[:7] == 'msgstr[':
@@ -1349,8 +1457,10 @@
elif tokens[0] == '#|':
if nb_tokens <= 1:
- raise IOError('Syntax error in po file %s (line %s)' %
- (self.instance.fpath, self.current_line))
+ raise IOError(
+ 'Syntax error in po file %s (line %s)'
+ % (self.instance.fpath, self.current_line)
+ )
# Remove the marker and any whitespace right after that.
line = line[2:].lstrip()
@@ -1363,30 +1473,38 @@
if nb_tokens == 2:
# Invalid continuation line.
- raise IOError('Syntax error in po file %s (line %s): '
- 'invalid continuation line' %
- (self.instance.fpath, self.current_line))
+ raise IOError(
+ 'Syntax error in po file %s (line %s): '
+ 'invalid continuation line'
+ % (self.instance.fpath, self.current_line)
+ )
# we are on a "previous translation" comment line,
if tokens[1] not in prev_keywords:
# Unknown keyword in previous translation comment.
- raise IOError('Syntax error in po file %s (line %s): '
- 'unknown keyword %s' %
- (self.instance.fpath, self.current_line,
- tokens[1]))
+ raise IOError(
+ 'Syntax error in po file %s (line %s): '
+ 'unknown keyword %s'
+ % (self.instance.fpath, self.current_line, tokens[1])
+ )
# Remove the keyword and any whitespace
# between it and the starting quote.
- line = line[len(tokens[1]):].lstrip()
+ line = line[len(tokens[1]) :].lstrip()
self.current_token = line
self.process(prev_keywords[tokens[1]])
else:
- raise IOError('Syntax error in po file %s (line %s)' %
- (self.instance.fpath, self.current_line))
+ raise IOError(
+ 'Syntax error in po file %s (line %s)'
+ % (self.instance.fpath, self.current_line)
+ )
- if self.current_entry and len(tokens) > 0 and \
- not tokens[0].startswith('#'):
+ if (
+ self.current_entry
+ and len(tokens) > 0
+ and not tokens[0].startswith('#')
+ ):
# since entries are added when another entry is found, we must add
# the last entry here (only if there are lines). Trailing comments
# are ignored
@@ -1449,8 +1567,9 @@
if action():
self.current_state = state
except Exception:
- raise IOError('Syntax error in po file (line %s)' %
- self.current_line)
+ raise IOError(
+ 'Syntax error in po file (line %s)' % self.current_line
+ )
# state handlers
@@ -1507,8 +1626,9 @@
if self.current_state in ['mc', 'ms', 'mx']:
self.instance.append(self.current_entry)
self.current_entry = POEntry(linenum=self.current_line)
- self.current_entry.flags += [c.strip() for c in
- self.current_token[3:].split(',')]
+ self.current_entry.flags += [
+ c.strip() for c in self.current_token[3:].split(',')
+ ]
return True
def handle_pp(self):
@@ -1516,8 +1636,9 @@
if self.current_state in ['mc', 'ms', 'mx']:
self.instance.append(self.current_entry)
self.current_entry = POEntry(linenum=self.current_line)
- self.current_entry.previous_msgid_plural = \
- unescape(self.current_token[1:-1])
+ self.current_entry.previous_msgid_plural = unescape(
+ self.current_token[1:-1]
+ )
return True
def handle_pm(self):
@@ -1525,8 +1646,7 @@
if self.current_state in ['mc', 'ms', 'mx']:
self.instance.append(self.current_entry)
self.current_entry = POEntry(linenum=self.current_line)
- self.current_entry.previous_msgid = \
- unescape(self.current_token[1:-1])
+ self.current_entry.previous_msgid = unescape(self.current_token[1:-1])
return True
def handle_pc(self):
@@ -1534,8 +1654,7 @@
if self.current_state in ['mc', 'ms', 'mx']:
self.instance.append(self.current_entry)
self.current_entry = POEntry(linenum=self.current_line)
- self.current_entry.previous_msgctxt = \
- unescape(self.current_token[1:-1])
+ self.current_entry.previous_msgctxt = unescape(self.current_token[1:-1])
return True
def handle_ct(self):
@@ -1568,7 +1687,7 @@
def handle_mx(self):
"""Handle a msgstr plural."""
index = self.current_token[7]
- value = self.current_token[self.current_token.find('"') + 1:-1]
+ value = self.current_token[self.current_token.find('"') + 1 : -1]
self.current_entry.msgstr_plural[int(index)] = unescape(value)
self.msgstr_index = int(index)
return True
@@ -1594,6 +1713,8 @@
self.current_entry.previous_msgctxt += token
# don't change the current state
return False
+
+
# }}}
# class _MOFileParser {{{
@@ -1628,7 +1749,7 @@
self.instance = klass(
fpath=mofile,
encoding=kwargs.get('encoding', default_encoding),
- check_for_duplicates=kwargs.get('check_for_duplicates', False)
+ check_for_duplicates=kwargs.get('check_for_duplicates', False),
)
def __del__(self):
@@ -1699,8 +1820,9 @@
entry = self._build_entry(
msgid=msgid_tokens[0],
msgid_plural=msgid_tokens[1],
- msgstr_plural=dict((k, v) for k, v in
- enumerate(msgstr.split(b('\0'))))
+ msgstr_plural=dict(
+ (k, v) for k, v in enumerate(msgstr.split(b('\0')))
+ ),
)
else:
entry = self._build_entry(msgid=msgid, msgstr=msgstr)
@@ -1709,8 +1831,9 @@
self.fhandle.close()
return self.instance
- def _build_entry(self, msgid, msgstr=None, msgid_plural=None,
- msgstr_plural=None):
+ def _build_entry(
+ self, msgid, msgstr=None, msgid_plural=None, msgstr_plural=None
+ ):
msgctxt_msgid = msgid.split(b('\x04'))
encoding = self.instance.encoding
if len(msgctxt_msgid) > 1:
@@ -1740,6 +1863,8 @@
if len(tup) == 1:
return tup[0]
return tup
+
+
# }}}
# class TextWrapper {{{
@@ -1749,6 +1874,7 @@
Subclass of textwrap.TextWrapper that backport the
drop_whitespace option.
"""
+
def __init__(self, *args, **kwargs):
drop_whitespace = kwargs.pop('drop_whitespace', True)
textwrap.TextWrapper.__init__(self, *args, **kwargs)
@@ -1823,6 +1949,8 @@
lines.append(indent + ''.join(cur_line))
return lines
+
+
# }}}
# function wrap() {{{
@@ -1835,4 +1963,5 @@
return TextWrapper(width=width, **kwargs).wrap(text)
return textwrap.wrap(text, width=width, **kwargs)
+
# }}}