|
1 #!/usr/bin/env python |
|
2 # -*- coding: utf-8 -*- |
|
3 # |
|
4 # License: MIT (see LICENSE file provided) |
|
5 # vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: |
|
6 |
|
7 """ |
|
8 **polib** allows you to manipulate, create, modify gettext files (pot, po |
|
9 and mo files). You can load existing files, iterate through it's entries, |
|
10 add, modify entries, comments or metadata, etc... or create new po files |
|
11 from scratch. |
|
12 |
|
13 **polib** provides a simple and pythonic API, exporting only three |
|
14 convenience functions (*pofile*, *mofile* and *detect_encoding*), and the |
|
15 four core classes, *POFile*, *MOFile*, *POEntry* and *MOEntry* for creating |
|
16 new files/entries. |
|
17 |
|
18 **Basic example**: |
|
19 |
|
20 >>> import polib |
|
21 >>> # load an existing po file |
|
22 >>> po = polib.pofile('tests/test_utf8.po') |
|
23 >>> for entry in po: |
|
24 ... # do something with entry... |
|
25 ... pass |
|
26 >>> # add an entry |
|
27 >>> entry = polib.POEntry(msgid='Welcome', msgstr='Bienvenue') |
|
28 >>> entry.occurrences = [('welcome.py', '12'), ('anotherfile.py', '34')] |
|
29 >>> po.append(entry) |
|
30 >>> # to save our modified po file: |
|
31 >>> # po.save() |
|
32 >>> # or you may want to compile the po file |
|
33 >>> # po.save_as_mofile('tests/test_utf8.mo') |
|
34 """ |
|
35 |
|
36 __author__ = 'David JEAN LOUIS <izimobil@gmail.com>' |
|
37 __version__ = '0.5.2' |
|
38 __all__ = ['pofile', 'POFile', 'POEntry', 'mofile', 'MOFile', 'MOEntry', |
|
39 'detect_encoding', 'escape', 'unescape', 'detect_encoding',] |
|
40 |
|
41 import codecs |
|
42 import struct |
|
43 import textwrap |
|
44 import types |
|
45 |
|
46 default_encoding = 'utf-8' |
|
47 |
|
48 # function pofile() {{{ |
|
49 |
|
50 def pofile(fpath, **kwargs): |
|
51 """ |
|
52 Convenience function that parse the po/pot file *fpath* and return |
|
53 a POFile instance. |
|
54 |
|
55 **Keyword arguments**: |
|
56 - *fpath*: string, full or relative path to the po/pot file to parse |
|
57 - *wrapwidth*: integer, the wrap width, only useful when -w option was |
|
58 passed to xgettext (optional, default to 78) |
|
59 - *autodetect_encoding*: boolean, if set to False the function will |
|
60 not try to detect the po file encoding (optional, default to True) |
|
61 - *encoding*: string, an encoding, only relevant if autodetect_encoding |
|
62 is set to False |
|
63 - *check_for_duplicates*: whether to check for duplicate entries when |
|
64 adding entries to the file, default: False (optional) |
|
65 |
|
66 **Example**: |
|
67 |
|
68 >>> import polib |
|
69 >>> po = polib.pofile('tests/test_weird_occurrences.po', |
|
70 ... check_for_duplicates=True) |
|
71 >>> po #doctest: +ELLIPSIS |
|
72 <POFile instance at ...> |
|
73 >>> import os, tempfile |
|
74 >>> all_attrs = ('msgctxt', 'msgid', 'msgstr', 'msgid_plural', |
|
75 ... 'msgstr_plural', 'obsolete', 'comment', 'tcomment', |
|
76 ... 'occurrences', 'flags', 'previous_msgctxt', |
|
77 ... 'previous_msgid', 'previous_msgid_plural') |
|
78 >>> for fname in ['test_iso-8859-15.po', 'test_utf8.po']: |
|
79 ... orig_po = polib.pofile('tests/'+fname) |
|
80 ... tmpf = tempfile.NamedTemporaryFile().name |
|
81 ... orig_po.save(tmpf) |
|
82 ... try: |
|
83 ... new_po = polib.pofile(tmpf) |
|
84 ... for old, new in zip(orig_po, new_po): |
|
85 ... for attr in all_attrs: |
|
86 ... if getattr(old, attr) != getattr(new, attr): |
|
87 ... getattr(old, attr) |
|
88 ... getattr(new, attr) |
|
89 ... finally: |
|
90 ... os.unlink(tmpf) |
|
91 >>> po_file = polib.pofile('tests/test_save_as_mofile.po') |
|
92 >>> tmpf = tempfile.NamedTemporaryFile().name |
|
93 >>> po_file.save_as_mofile(tmpf) |
|
94 >>> try: |
|
95 ... mo_file = polib.mofile(tmpf) |
|
96 ... for old, new in zip(po_file, mo_file): |
|
97 ... if po_file._encode(old.msgid) != mo_file._encode(new.msgid): |
|
98 ... 'OLD: ', po_file._encode(old.msgid) |
|
99 ... 'NEW: ', mo_file._encode(new.msgid) |
|
100 ... if po_file._encode(old.msgstr) != mo_file._encode(new.msgstr): |
|
101 ... 'OLD: ', po_file._encode(old.msgstr) |
|
102 ... 'NEW: ', mo_file._encode(new.msgstr) |
|
103 ... print new.msgstr |
|
104 ... finally: |
|
105 ... os.unlink(tmpf) |
|
106 """ |
|
107 if kwargs.get('autodetect_encoding', True) == True: |
|
108 enc = detect_encoding(fpath) |
|
109 else: |
|
110 enc = kwargs.get('encoding', default_encoding) |
|
111 check_for_duplicates = kwargs.get('check_for_duplicates', False) |
|
112 parser = _POFileParser( |
|
113 fpath, |
|
114 encoding=enc, |
|
115 check_for_duplicates=kwargs.get('check_for_duplicates', False) |
|
116 ) |
|
117 instance = parser.parse() |
|
118 instance.wrapwidth = kwargs.get('wrapwidth', 78) |
|
119 return instance |
|
120 |
|
121 # }}} |
|
122 # function mofile() {{{ |
|
123 |
|
124 def mofile(fpath, **kwargs): |
|
125 """ |
|
126 Convenience function that parse the mo file *fpath* and return |
|
127 a MOFile instance. |
|
128 |
|
129 **Keyword arguments**: |
|
130 - *fpath*: string, full or relative path to the mo file to parse |
|
131 - *wrapwidth*: integer, the wrap width, only useful when -w option was |
|
132 passed to xgettext to generate the po file that was used to format |
|
133 the mo file (optional, default to 78) |
|
134 - *autodetect_encoding*: boolean, if set to False the function will |
|
135 not try to detect the po file encoding (optional, default to True) |
|
136 - *encoding*: string, an encoding, only relevant if autodetect_encoding |
|
137 is set to False |
|
138 - *check_for_duplicates*: whether to check for duplicate entries when |
|
139 adding entries to the file, default: False (optional) |
|
140 |
|
141 **Example**: |
|
142 |
|
143 >>> import polib |
|
144 >>> mo = polib.mofile('tests/test_utf8.mo', check_for_duplicates=True) |
|
145 >>> mo #doctest: +ELLIPSIS |
|
146 <MOFile instance at ...> |
|
147 >>> import os, tempfile |
|
148 >>> for fname in ['test_iso-8859-15.mo', 'test_utf8.mo']: |
|
149 ... orig_mo = polib.mofile('tests/'+fname) |
|
150 ... tmpf = tempfile.NamedTemporaryFile().name |
|
151 ... orig_mo.save(tmpf) |
|
152 ... try: |
|
153 ... new_mo = polib.mofile(tmpf) |
|
154 ... for old, new in zip(orig_mo, new_mo): |
|
155 ... if old.msgid != new.msgid: |
|
156 ... old.msgstr |
|
157 ... new.msgstr |
|
158 ... finally: |
|
159 ... os.unlink(tmpf) |
|
160 """ |
|
161 if kwargs.get('autodetect_encoding', True) == True: |
|
162 enc = detect_encoding(fpath, True) |
|
163 else: |
|
164 enc = kwargs.get('encoding', default_encoding) |
|
165 parser = _MOFileParser( |
|
166 fpath, |
|
167 encoding=enc, |
|
168 check_for_duplicates=kwargs.get('check_for_duplicates', False) |
|
169 ) |
|
170 instance = parser.parse() |
|
171 instance.wrapwidth = kwargs.get('wrapwidth', 78) |
|
172 return instance |
|
173 |
|
174 # }}} |
|
175 # function detect_encoding() {{{ |
|
176 |
|
177 def detect_encoding(fpath, binary_mode=False): |
|
178 """ |
|
179 Try to detect the encoding used by the file *fpath*. The function will |
|
180 return polib default *encoding* if it's unable to detect it. |
|
181 |
|
182 **Keyword argument**: |
|
183 - *fpath*: string, full or relative path to the mo file to parse. |
|
184 |
|
185 **Examples**: |
|
186 |
|
187 >>> print(detect_encoding('tests/test_noencoding.po')) |
|
188 utf-8 |
|
189 >>> print(detect_encoding('tests/test_utf8.po')) |
|
190 UTF-8 |
|
191 >>> print(detect_encoding('tests/test_utf8.mo', True)) |
|
192 UTF-8 |
|
193 >>> print(detect_encoding('tests/test_iso-8859-15.po')) |
|
194 ISO_8859-15 |
|
195 >>> print(detect_encoding('tests/test_iso-8859-15.mo', True)) |
|
196 ISO_8859-15 |
|
197 """ |
|
198 import re |
|
199 rx = re.compile(r'"?Content-Type:.+? charset=([\w_\-:\.]+)') |
|
200 if binary_mode: |
|
201 mode = 'rb' |
|
202 else: |
|
203 mode = 'r' |
|
204 f = open(fpath, mode) |
|
205 for l in f.readlines(): |
|
206 match = rx.search(l) |
|
207 if match: |
|
208 f.close() |
|
209 return match.group(1).strip() |
|
210 f.close() |
|
211 return default_encoding |
|
212 |
|
213 # }}} |
|
214 # function escape() {{{ |
|
215 |
|
216 def escape(st): |
|
217 """ |
|
218 Escape special chars and return the given string *st*. |
|
219 |
|
220 **Examples**: |
|
221 |
|
222 >>> escape('\\t and \\n and \\r and " and \\\\') |
|
223 '\\\\t and \\\\n and \\\\r and \\\\" and \\\\\\\\' |
|
224 """ |
|
225 return st.replace('\\', r'\\')\ |
|
226 .replace('\t', r'\t')\ |
|
227 .replace('\r', r'\r')\ |
|
228 .replace('\n', r'\n')\ |
|
229 .replace('\"', r'\"') |
|
230 |
|
231 # }}} |
|
232 # function unescape() {{{ |
|
233 |
|
234 def unescape(st): |
|
235 """ |
|
236 Unescape special chars and return the given string *st*. |
|
237 |
|
238 **Examples**: |
|
239 |
|
240 >>> unescape('\\\\t and \\\\n and \\\\r and \\\\" and \\\\\\\\') |
|
241 '\\t and \\n and \\r and " and \\\\' |
|
242 >>> unescape(r'\\n') |
|
243 '\\n' |
|
244 >>> unescape(r'\\\\n') |
|
245 '\\\\n' |
|
246 """ |
|
247 raw_strings = [ |
|
248 (r'\\n', r'\n', '\n'), |
|
249 (r'\\r', r'\r', '\r'), |
|
250 (r'\\t', r'\t', '\t'), |
|
251 ] |
|
252 for a, b, c in raw_strings: |
|
253 if a in st: |
|
254 st = st.replace(a, b) |
|
255 else: |
|
256 st = st.replace(b, c) |
|
257 return st.replace(r'\"', '"').replace(r'\\', '\\') |
|
258 |
|
259 # }}} |
|
260 # class _BaseFile {{{ |
|
261 |
|
262 class _BaseFile(list): |
|
263 """ |
|
264 Common parent class for POFile and MOFile classes. |
|
265 This class must **not** be instanciated directly. |
|
266 """ |
|
267 |
|
268 def __init__(self, *args, **kwargs): |
|
269 """ |
|
270 Constructor. |
|
271 |
|
272 **Keyword arguments**: |
|
273 - *fpath*: string, path to po or mo file |
|
274 - *wrapwidth*: integer, the wrap width, only useful when -w option |
|
275 was passed to xgettext to generate the po file that was used to |
|
276 format the mo file, default to 78 (optional), |
|
277 - *encoding*: string, the encoding to use, defaults to |
|
278 "default_encoding" global variable (optional), |
|
279 - *check_for_duplicates*: whether to check for duplicate entries |
|
280 when adding entries to the file, default: False (optional). |
|
281 """ |
|
282 list.__init__(self) |
|
283 # the opened file handle |
|
284 self.fpath = kwargs.get('fpath') |
|
285 # the width at which lines should be wrapped |
|
286 self.wrapwidth = kwargs.get('wrapwidth', 78) |
|
287 # the file encoding |
|
288 self.encoding = kwargs.get('encoding', default_encoding) |
|
289 # whether to check for duplicate entries or not |
|
290 self.check_for_duplicates = kwargs.get('check_for_duplicates', False) |
|
291 # header |
|
292 self.header = '' |
|
293 # both po and mo files have metadata |
|
294 self.metadata = {} |
|
295 self.metadata_is_fuzzy = 0 |
|
296 |
|
297 def __str__(self): |
|
298 """ |
|
299 String representation of the file. |
|
300 """ |
|
301 ret = [] |
|
302 entries = [self.metadata_as_entry()] + \ |
|
303 [e for e in self if not e.obsolete] |
|
304 for entry in entries: |
|
305 ret.append(entry.__str__(self.wrapwidth)) |
|
306 for entry in self.obsolete_entries(): |
|
307 ret.append(entry.__str__(self.wrapwidth)) |
|
308 return '\n'.join(ret) |
|
309 |
|
310 def __contains__(self, entry): |
|
311 """ |
|
312 Overriden method to implement the membership test (in and not in). |
|
313 The method considers that an entry is in the file if it finds an |
|
314 entry that has the same msgid (case sensitive). |
|
315 |
|
316 **Keyword argument**: |
|
317 - *entry*: an instance of polib._BaseEntry |
|
318 |
|
319 **Tests**: |
|
320 >>> po = POFile() |
|
321 >>> e1 = POEntry(msgid='foobar', msgstr='spam') |
|
322 >>> e2 = POEntry(msgid='barfoo', msgstr='spam') |
|
323 >>> e3 = POEntry(msgid='foobar', msgstr='eggs') |
|
324 >>> e4 = POEntry(msgid='spameggs', msgstr='eggs') |
|
325 >>> po.append(e1) |
|
326 >>> po.append(e2) |
|
327 >>> e1 in po |
|
328 True |
|
329 >>> e2 not in po |
|
330 False |
|
331 >>> e3 in po |
|
332 True |
|
333 >>> e4 in po |
|
334 False |
|
335 """ |
|
336 return self.find(entry.msgid, by='msgid') is not None |
|
337 |
|
338 def append(self, entry): |
|
339 """ |
|
340 Overriden method to check for duplicates entries, if a user tries to |
|
341 add an entry that already exists, the method will raise a ValueError |
|
342 exception. |
|
343 |
|
344 **Keyword argument**: |
|
345 - *entry*: an instance of polib._BaseEntry |
|
346 |
|
347 **Tests**: |
|
348 >>> e1 = POEntry(msgid='foobar', msgstr='spam') |
|
349 >>> e2 = POEntry(msgid='foobar', msgstr='eggs') |
|
350 >>> po = POFile(check_for_duplicates=True) |
|
351 >>> po.append(e1) |
|
352 >>> try: |
|
353 ... po.append(e2) |
|
354 ... except ValueError, e: |
|
355 ... unicode(e) |
|
356 u'Entry "foobar" already exists' |
|
357 """ |
|
358 if self.check_for_duplicates and entry in self: |
|
359 raise ValueError('Entry "%s" already exists' % entry.msgid) |
|
360 super(_BaseFile, self).append(entry) |
|
361 |
|
362 def insert(self, index, entry): |
|
363 """ |
|
364 Overriden method to check for duplicates entries, if a user tries to |
|
365 insert an entry that already exists, the method will raise a ValueError |
|
366 exception. |
|
367 |
|
368 **Keyword arguments**: |
|
369 - *index*: index at which the entry should be inserted |
|
370 - *entry*: an instance of polib._BaseEntry |
|
371 |
|
372 **Tests**: |
|
373 >>> import polib |
|
374 >>> polib.check_for_duplicates = True |
|
375 >>> e1 = POEntry(msgid='foobar', msgstr='spam') |
|
376 >>> e2 = POEntry(msgid='barfoo', msgstr='eggs') |
|
377 >>> e3 = POEntry(msgid='foobar', msgstr='eggs') |
|
378 >>> po = POFile(check_for_duplicates=True) |
|
379 >>> po.insert(0, e1) |
|
380 >>> po.insert(1, e2) |
|
381 >>> try: |
|
382 ... po.insert(0, e3) |
|
383 ... except ValueError, e: |
|
384 ... unicode(e) |
|
385 u'Entry "foobar" already exists' |
|
386 """ |
|
387 if self.check_for_duplicates and entry in self: |
|
388 raise ValueError('Entry "%s" already exists' % entry.msgid) |
|
389 super(_BaseFile, self).insert(index, entry) |
|
390 |
|
391 def __repr__(self): |
|
392 """Return the official string representation of the object.""" |
|
393 return '<%s instance at %x>' % (self.__class__.__name__, id(self)) |
|
394 |
|
395 def metadata_as_entry(self): |
|
396 """ |
|
397 Return the metadata as an entry: |
|
398 |
|
399 >>> import polib |
|
400 >>> po = polib.pofile('tests/test_fuzzy_header.po') |
|
401 >>> unicode(po) == unicode(open('tests/test_fuzzy_header.po').read()) |
|
402 True |
|
403 """ |
|
404 e = POEntry(msgid='') |
|
405 mdata = self.ordered_metadata() |
|
406 if mdata: |
|
407 strs = [] |
|
408 e._multiline_str['msgstr'] = '' |
|
409 for name, value in mdata: |
|
410 # Strip whitespace off each line in a multi-line entry |
|
411 strs.append('%s: %s' % (name, value)) |
|
412 e.msgstr = '\n'.join(strs) + '\n' |
|
413 e._multiline_str['msgstr'] = '__POLIB__NL__'.join( |
|
414 [s + '\n' for s in strs]) |
|
415 if self.metadata_is_fuzzy: |
|
416 e.flags.append('fuzzy') |
|
417 return e |
|
418 |
|
419 def save(self, fpath=None, repr_method='__str__'): |
|
420 """ |
|
421 Save the po file to file *fpath* if no file handle exists for |
|
422 the object. If there's already an open file and no fpath is |
|
423 provided, then the existing file is rewritten with the modified |
|
424 data. |
|
425 |
|
426 **Keyword arguments**: |
|
427 - *fpath*: string, full or relative path to the file. |
|
428 - *repr_method*: string, the method to use for output. |
|
429 """ |
|
430 if self.fpath is None and fpath is None: |
|
431 raise IOError('You must provide a file path to save() method') |
|
432 contents = getattr(self, repr_method)() |
|
433 if fpath is None: |
|
434 fpath = self.fpath |
|
435 if repr_method == 'to_binary': |
|
436 fhandle = open(fpath, 'wb') |
|
437 else: |
|
438 fhandle = codecs.open(fpath, 'w', self.encoding) |
|
439 if type(contents) != types.UnicodeType: |
|
440 contents = contents.decode(self.encoding) |
|
441 fhandle.write(contents) |
|
442 fhandle.close() |
|
443 |
|
444 def find(self, st, by='msgid'): |
|
445 """ |
|
446 Find entry which msgid (or property identified by the *by* |
|
447 attribute) matches the string *st*. |
|
448 |
|
449 **Keyword arguments**: |
|
450 - *st*: string, the string to search for |
|
451 - *by*: string, the comparison attribute |
|
452 |
|
453 **Examples**: |
|
454 |
|
455 >>> po = pofile('tests/test_utf8.po') |
|
456 >>> entry = po.find('Thursday') |
|
457 >>> entry.msgstr |
|
458 u'Jueves' |
|
459 >>> entry = po.find('Some unexistant msgid') |
|
460 >>> entry is None |
|
461 True |
|
462 >>> entry = po.find('Jueves', 'msgstr') |
|
463 >>> entry.msgid |
|
464 u'Thursday' |
|
465 """ |
|
466 for e in self: |
|
467 if getattr(e, by) == st: |
|
468 return e |
|
469 return None |
|
470 |
|
471 def ordered_metadata(self): |
|
472 """ |
|
473 Convenience method that return the metadata ordered. The return |
|
474 value is list of tuples (metadata name, metadata_value). |
|
475 """ |
|
476 # copy the dict first |
|
477 metadata = self.metadata.copy() |
|
478 data_order = [ |
|
479 'Project-Id-Version', |
|
480 'Report-Msgid-Bugs-To', |
|
481 'POT-Creation-Date', |
|
482 'PO-Revision-Date', |
|
483 'Last-Translator', |
|
484 'Language-Team', |
|
485 'MIME-Version', |
|
486 'Content-Type', |
|
487 'Content-Transfer-Encoding' |
|
488 ] |
|
489 ordered_data = [] |
|
490 for data in data_order: |
|
491 try: |
|
492 value = metadata.pop(data) |
|
493 ordered_data.append((data, value)) |
|
494 except KeyError: |
|
495 pass |
|
496 # the rest of the metadata won't be ordered there are no specs for this |
|
497 keys = metadata.keys() |
|
498 list(keys).sort() |
|
499 for data in keys: |
|
500 value = metadata[data] |
|
501 ordered_data.append((data, value)) |
|
502 return ordered_data |
|
503 |
|
504 def to_binary(self): |
|
505 """ |
|
506 Return the mofile binary representation. |
|
507 """ |
|
508 import array |
|
509 import struct |
|
510 import types |
|
511 offsets = [] |
|
512 entries = self.translated_entries() |
|
513 # the keys are sorted in the .mo file |
|
514 def cmp(_self, other): |
|
515 if _self.msgid > other.msgid: |
|
516 return 1 |
|
517 elif _self.msgid < other.msgid: |
|
518 return -1 |
|
519 else: |
|
520 return 0 |
|
521 # add metadata entry |
|
522 entries.sort(cmp) |
|
523 mentry = self.metadata_as_entry() |
|
524 mentry.msgstr = mentry.msgstr.replace('\\n', '').lstrip() |
|
525 entries = [mentry] + entries |
|
526 entries_len = len(entries) |
|
527 ids, strs = '', '' |
|
528 for e in entries: |
|
529 # For each string, we need size and file offset. Each string is |
|
530 # NUL terminated; the NUL does not count into the size. |
|
531 if e.msgid_plural: |
|
532 indexes = e.msgstr_plural.keys() |
|
533 indexes.sort() |
|
534 msgstr = [] |
|
535 for index in indexes: |
|
536 msgstr.append(e.msgstr_plural[index]) |
|
537 msgid = self._encode(e.msgid + '\0' + e.msgid_plural) |
|
538 msgstr = self._encode('\0'.join(msgstr)) |
|
539 else: |
|
540 msgid = self._encode(e.msgid) |
|
541 msgstr = self._encode(e.msgstr) |
|
542 offsets.append((len(ids), len(msgid), len(strs), len(msgstr))) |
|
543 ids += msgid + '\0' |
|
544 strs += msgstr + '\0' |
|
545 # The header is 7 32-bit unsigned integers. |
|
546 keystart = 7*4+16*entries_len |
|
547 # and the values start after the keys |
|
548 valuestart = keystart + len(ids) |
|
549 koffsets = [] |
|
550 voffsets = [] |
|
551 # The string table first has the list of keys, then the list of values. |
|
552 # Each entry has first the size of the string, then the file offset. |
|
553 for o1, l1, o2, l2 in offsets: |
|
554 koffsets += [l1, o1+keystart] |
|
555 voffsets += [l2, o2+valuestart] |
|
556 offsets = koffsets + voffsets |
|
557 output = struct.pack("IIIIIII", |
|
558 0x950412de, # Magic number |
|
559 0, # Version |
|
560 entries_len, # # of entries |
|
561 7*4, # start of key index |
|
562 7*4+entries_len*8, # start of value index |
|
563 0, 0) # size and offset of hash table |
|
564 output += array.array("I", offsets).tostring() |
|
565 output += ids |
|
566 output += strs |
|
567 return output |
|
568 |
|
569 def _encode(self, mixed): |
|
570 """ |
|
571 Encode the given argument with the file encoding if the type is unicode |
|
572 and return the encoded string. |
|
573 """ |
|
574 if type(mixed) == types.UnicodeType: |
|
575 return mixed.encode(self.encoding) |
|
576 return mixed |
|
577 |
|
578 # }}} |
|
579 # class POFile {{{ |
|
580 |
|
581 class POFile(_BaseFile): |
|
582 ''' |
|
583 Po (or Pot) file reader/writer. |
|
584 POFile objects inherit the list objects methods. |
|
585 |
|
586 **Example**: |
|
587 |
|
588 >>> po = POFile() |
|
589 >>> entry1 = POEntry( |
|
590 ... msgid="Some english text", |
|
591 ... msgstr="Un texte en anglais" |
|
592 ... ) |
|
593 >>> entry1.occurrences = [('testfile', 12),('another_file', 1)] |
|
594 >>> entry1.comment = "Some useful comment" |
|
595 >>> entry2 = POEntry( |
|
596 ... msgid="Peace in some languages", |
|
597 ... msgstr="Pace سلام שלום Hasîtî 和平" |
|
598 ... ) |
|
599 >>> entry2.occurrences = [('testfile', 15),('another_file', 5)] |
|
600 >>> entry2.comment = "Another useful comment" |
|
601 >>> entry3 = POEntry( |
|
602 ... msgid='Some entry with quotes " \\"', |
|
603 ... msgstr='Un message unicode avec des quotes " \\"' |
|
604 ... ) |
|
605 >>> entry3.comment = "Test string quoting" |
|
606 >>> po.append(entry1) |
|
607 >>> po.append(entry2) |
|
608 >>> po.append(entry3) |
|
609 >>> po.header = "Some Header" |
|
610 >>> print(po) |
|
611 # Some Header |
|
612 msgid "" |
|
613 msgstr "" |
|
614 <BLANKLINE> |
|
615 #. Some useful comment |
|
616 #: testfile:12 another_file:1 |
|
617 msgid "Some english text" |
|
618 msgstr "Un texte en anglais" |
|
619 <BLANKLINE> |
|
620 #. Another useful comment |
|
621 #: testfile:15 another_file:5 |
|
622 msgid "Peace in some languages" |
|
623 msgstr "Pace سلام שלום Hasîtî 和平" |
|
624 <BLANKLINE> |
|
625 #. Test string quoting |
|
626 msgid "Some entry with quotes \\" \\"" |
|
627 msgstr "Un message unicode avec des quotes \\" \\"" |
|
628 <BLANKLINE> |
|
629 ''' |
|
630 |
|
631 def __str__(self): |
|
632 """Return the string representation of the po file""" |
|
633 ret, headers = '', self.header.split('\n') |
|
634 for header in headers: |
|
635 if header[:1] in [',', ':']: |
|
636 ret += '#%s\n' % header |
|
637 else: |
|
638 ret += '# %s\n' % header |
|
639 return ret + _BaseFile.__str__(self) |
|
640 |
|
641 def save_as_mofile(self, fpath): |
|
642 """ |
|
643 Save the binary representation of the file to *fpath*. |
|
644 |
|
645 **Keyword arguments**: |
|
646 - *fpath*: string, full or relative path to the file. |
|
647 """ |
|
648 _BaseFile.save(self, fpath, 'to_binary') |
|
649 |
|
650 def percent_translated(self): |
|
651 """ |
|
652 Convenience method that return the percentage of translated |
|
653 messages. |
|
654 |
|
655 **Example**: |
|
656 |
|
657 >>> import polib |
|
658 >>> po = polib.pofile('tests/test_pofile_helpers.po') |
|
659 >>> po.percent_translated() |
|
660 50 |
|
661 >>> po = POFile() |
|
662 >>> po.percent_translated() |
|
663 100 |
|
664 """ |
|
665 total = len([e for e in self if not e.obsolete]) |
|
666 if total == 0: |
|
667 return 100 |
|
668 translated = len(self.translated_entries()) |
|
669 return int((100.00 / float(total)) * translated) |
|
670 |
|
671 def translated_entries(self): |
|
672 """ |
|
673 Convenience method that return a list of translated entries. |
|
674 |
|
675 **Example**: |
|
676 |
|
677 >>> import polib |
|
678 >>> po = polib.pofile('tests/test_pofile_helpers.po') |
|
679 >>> len(po.translated_entries()) |
|
680 6 |
|
681 """ |
|
682 return [e for e in self if e.translated()] |
|
683 |
|
684 def untranslated_entries(self): |
|
685 """ |
|
686 Convenience method that return a list of untranslated entries. |
|
687 |
|
688 **Example**: |
|
689 |
|
690 >>> import polib |
|
691 >>> po = polib.pofile('tests/test_pofile_helpers.po') |
|
692 >>> len(po.untranslated_entries()) |
|
693 4 |
|
694 """ |
|
695 return [e for e in self if not e.translated() and not e.obsolete \ |
|
696 and not 'fuzzy' in e.flags] |
|
697 |
|
698 def fuzzy_entries(self): |
|
699 """ |
|
700 Convenience method that return the list of 'fuzzy' entries. |
|
701 |
|
702 **Example**: |
|
703 |
|
704 >>> import polib |
|
705 >>> po = polib.pofile('tests/test_pofile_helpers.po') |
|
706 >>> len(po.fuzzy_entries()) |
|
707 2 |
|
708 """ |
|
709 return [e for e in self if 'fuzzy' in e.flags] |
|
710 |
|
711 def obsolete_entries(self): |
|
712 """ |
|
713 Convenience method that return the list of obsolete entries. |
|
714 |
|
715 **Example**: |
|
716 |
|
717 >>> import polib |
|
718 >>> po = polib.pofile('tests/test_pofile_helpers.po') |
|
719 >>> len(po.obsolete_entries()) |
|
720 4 |
|
721 """ |
|
722 return [e for e in self if e.obsolete] |
|
723 |
|
724 def merge(self, refpot): |
|
725 """ |
|
726 XXX this could not work if encodings are different, needs thinking |
|
727 and general refactoring of how polib handles encoding... |
|
728 |
|
729 Convenience method that merge the current pofile with the pot file |
|
730 provided. It behaves exactly as the gettext msgmerge utility: |
|
731 |
|
732 - comments of this file will be preserved, but extracted comments |
|
733 and occurrences will be discarded |
|
734 - any translations or comments in the file will be discarded, |
|
735 however dot comments and file positions will be preserved |
|
736 |
|
737 **Keyword argument**: |
|
738 - *refpot*: object POFile, the reference catalog. |
|
739 |
|
740 **Example**: |
|
741 |
|
742 >>> import polib |
|
743 >>> refpot = polib.pofile('tests/test_merge.pot') |
|
744 >>> po = polib.pofile('tests/test_merge_before.po') |
|
745 >>> po.merge(refpot) |
|
746 >>> expected_po = polib.pofile('tests/test_merge_after.po') |
|
747 >>> unicode(po) == unicode(expected_po) |
|
748 True |
|
749 """ |
|
750 for entry in refpot: |
|
751 e = self.find(entry.msgid) |
|
752 if e is None: |
|
753 e = POEntry() |
|
754 self.append(e) |
|
755 e.merge(entry) |
|
756 # ok, now we must "obsolete" entries that are not in the refpot |
|
757 # anymore |
|
758 for entry in self: |
|
759 if refpot.find(entry.msgid) is None: |
|
760 entry.obsolete = True |
|
761 |
|
762 # }}} |
|
763 # class MOFile {{{ |
|
764 |
|
765 class MOFile(_BaseFile): |
|
766 ''' |
|
767 Mo file reader/writer. |
|
768 MOFile objects inherit the list objects methods. |
|
769 |
|
770 **Example**: |
|
771 |
|
772 >>> mo = MOFile() |
|
773 >>> entry1 = POEntry( |
|
774 ... msgid="Some english text", |
|
775 ... msgstr="Un texte en anglais" |
|
776 ... ) |
|
777 >>> entry2 = POEntry( |
|
778 ... msgid="I need my dirty cheese", |
|
779 ... msgstr="Je veux mon sale fromage" |
|
780 ... ) |
|
781 >>> entry3 = MOEntry( |
|
782 ... msgid='Some entry with quotes " \\"', |
|
783 ... msgstr='Un message unicode avec des quotes " \\"' |
|
784 ... ) |
|
785 >>> mo.append(entry1) |
|
786 >>> mo.append(entry2) |
|
787 >>> mo.append(entry3) |
|
788 >>> print(mo) |
|
789 msgid "" |
|
790 msgstr "" |
|
791 <BLANKLINE> |
|
792 msgid "Some english text" |
|
793 msgstr "Un texte en anglais" |
|
794 <BLANKLINE> |
|
795 msgid "I need my dirty cheese" |
|
796 msgstr "Je veux mon sale fromage" |
|
797 <BLANKLINE> |
|
798 msgid "Some entry with quotes \\" \\"" |
|
799 msgstr "Un message unicode avec des quotes \\" \\"" |
|
800 <BLANKLINE> |
|
801 ''' |
|
802 |
|
803 def __init__(self, *args, **kwargs): |
|
804 """ |
|
805 MOFile constructor. Mo files have two other properties: |
|
806 - magic_number: the magic_number of the binary file, |
|
807 - version: the version of the mo spec. |
|
808 """ |
|
809 _BaseFile.__init__(self, *args, **kwargs) |
|
810 self.magic_number = None |
|
811 self.version = 0 |
|
812 |
|
813 def save_as_pofile(self, fpath): |
|
814 """ |
|
815 Save the string representation of the file to *fpath*. |
|
816 |
|
817 **Keyword argument**: |
|
818 - *fpath*: string, full or relative path to the file. |
|
819 """ |
|
820 _BaseFile.save(self, fpath) |
|
821 |
|
822 def save(self, fpath): |
|
823 """ |
|
824 Save the binary representation of the file to *fpath*. |
|
825 |
|
826 **Keyword argument**: |
|
827 - *fpath*: string, full or relative path to the file. |
|
828 """ |
|
829 _BaseFile.save(self, fpath, 'to_binary') |
|
830 |
|
831 def percent_translated(self): |
|
832 """ |
|
833 Convenience method to keep the same interface with POFile instances. |
|
834 """ |
|
835 return 100 |
|
836 |
|
837 def translated_entries(self): |
|
838 """ |
|
839 Convenience method to keep the same interface with POFile instances. |
|
840 """ |
|
841 return self |
|
842 |
|
843 def untranslated_entries(self): |
|
844 """ |
|
845 Convenience method to keep the same interface with POFile instances. |
|
846 """ |
|
847 return [] |
|
848 |
|
849 def fuzzy_entries(self): |
|
850 """ |
|
851 Convenience method to keep the same interface with POFile instances. |
|
852 """ |
|
853 return [] |
|
854 |
|
855 def obsolete_entries(self): |
|
856 """ |
|
857 Convenience method to keep the same interface with POFile instances. |
|
858 """ |
|
859 return [] |
|
860 |
|
861 # }}} |
|
862 # class _BaseEntry {{{ |
|
863 |
|
864 class _BaseEntry(object): |
|
865 """ |
|
866 Base class for POEntry or MOEntry objects. |
|
867 This class must *not* be instanciated directly. |
|
868 """ |
|
869 |
|
870 def __init__(self, *args, **kwargs): |
|
871 """Base Entry constructor.""" |
|
872 self.msgid = kwargs.get('msgid', '') |
|
873 self.msgstr = kwargs.get('msgstr', '') |
|
874 self.msgid_plural = kwargs.get('msgid_plural', '') |
|
875 self.msgstr_plural = kwargs.get('msgstr_plural', {}) |
|
876 self.obsolete = kwargs.get('obsolete', False) |
|
877 self.encoding = kwargs.get('encoding', default_encoding) |
|
878 self.msgctxt = kwargs.get('msgctxt', None) |
|
879 self._multiline_str = {} |
|
880 |
|
881 def __repr__(self): |
|
882 """Return the official string representation of the object.""" |
|
883 return '<%s instance at %x>' % (self.__class__.__name__, id(self)) |
|
884 |
|
885 def __str__(self, wrapwidth=78): |
|
886 """ |
|
887 Common string representation of the POEntry and MOEntry |
|
888 objects. |
|
889 """ |
|
890 if self.obsolete: |
|
891 delflag = '#~ ' |
|
892 else: |
|
893 delflag = '' |
|
894 ret = [] |
|
895 # write the msgctxt if any |
|
896 if self.msgctxt is not None: |
|
897 ret += self._str_field("msgctxt", delflag, "", self.msgctxt) |
|
898 # write the msgid |
|
899 ret += self._str_field("msgid", delflag, "", self.msgid) |
|
900 # write the msgid_plural if any |
|
901 if self.msgid_plural: |
|
902 ret += self._str_field("msgid_plural", delflag, "", self.msgid_plural) |
|
903 if self.msgstr_plural: |
|
904 # write the msgstr_plural if any |
|
905 msgstrs = self.msgstr_plural |
|
906 keys = list(msgstrs) |
|
907 keys.sort() |
|
908 for index in keys: |
|
909 msgstr = msgstrs[index] |
|
910 plural_index = '[%s]' % index |
|
911 ret += self._str_field("msgstr", delflag, plural_index, msgstr) |
|
912 else: |
|
913 # otherwise write the msgstr |
|
914 ret += self._str_field("msgstr", delflag, "", self.msgstr) |
|
915 ret.append('') |
|
916 return '\n'.join(ret) |
|
917 |
|
918 def _str_field(self, fieldname, delflag, plural_index, field): |
|
919 if (fieldname + plural_index) in self._multiline_str: |
|
920 field = self._multiline_str[fieldname + plural_index] |
|
921 lines = [''] + field.split('__POLIB__NL__') |
|
922 else: |
|
923 lines = field.splitlines(True) |
|
924 if len(lines) > 1: |
|
925 lines = ['']+lines # start with initial empty line |
|
926 else: |
|
927 lines = [field] # needed for the empty string case |
|
928 if fieldname.startswith('previous_'): |
|
929 # quick and dirty trick to get the real field name |
|
930 fieldname = fieldname[9:] |
|
931 |
|
932 ret = ['%s%s%s "%s"' % (delflag, fieldname, plural_index, |
|
933 escape(lines.pop(0)))] |
|
934 for mstr in lines: |
|
935 ret.append('%s"%s"' % (delflag, escape(mstr))) |
|
936 return ret |
|
937 |
|
938 # }}} |
|
939 # class POEntry {{{ |
|
940 |
|
941 class POEntry(_BaseEntry): |
|
942 """ |
|
943 Represents a po file entry. |
|
944 |
|
945 **Examples**: |
|
946 |
|
947 >>> entry = POEntry(msgid='Welcome', msgstr='Bienvenue') |
|
948 >>> entry.occurrences = [('welcome.py', 12), ('anotherfile.py', 34)] |
|
949 >>> print(entry) |
|
950 #: welcome.py:12 anotherfile.py:34 |
|
951 msgid "Welcome" |
|
952 msgstr "Bienvenue" |
|
953 <BLANKLINE> |
|
954 >>> entry = POEntry() |
|
955 >>> entry.occurrences = [('src/some-very-long-filename-that-should-not-be-wrapped-even-if-it-is-larger-than-the-wrap-limit.c', 32), ('src/eggs.c', 45)] |
|
956 >>> entry.comment = 'A plural translation. This is a very very very long line please do not wrap, this is just for testing comment wrapping...' |
|
957 >>> entry.tcomment = 'A plural translation. This is a very very very long line please do not wrap, this is just for testing comment wrapping...' |
|
958 >>> entry.flags.append('c-format') |
|
959 >>> entry.previous_msgctxt = '@somecontext' |
|
960 >>> entry.previous_msgid = 'I had eggs but no spam !' |
|
961 >>> entry.previous_msgid_plural = 'I had eggs and %d spam !' |
|
962 >>> entry.msgctxt = '@somenewcontext' |
|
963 >>> entry.msgid = 'I have spam but no egg !' |
|
964 >>> entry.msgid_plural = 'I have spam and %d eggs !' |
|
965 >>> entry.msgstr_plural[0] = "J'ai du jambon mais aucun oeuf !" |
|
966 >>> entry.msgstr_plural[1] = "J'ai du jambon et %d oeufs !" |
|
967 >>> print(entry) |
|
968 #. A plural translation. This is a very very very long line please do not |
|
969 #. wrap, this is just for testing comment wrapping... |
|
970 # A plural translation. This is a very very very long line please do not wrap, |
|
971 # this is just for testing comment wrapping... |
|
972 #: src/some-very-long-filename-that-should-not-be-wrapped-even-if-it-is-larger-than-the-wrap-limit.c:32 |
|
973 #: src/eggs.c:45 |
|
974 #, c-format |
|
975 #| msgctxt "@somecontext" |
|
976 #| msgid "I had eggs but no spam !" |
|
977 #| msgid_plural "I had eggs and %d spam !" |
|
978 msgctxt "@somenewcontext" |
|
979 msgid "I have spam but no egg !" |
|
980 msgid_plural "I have spam and %d eggs !" |
|
981 msgstr[0] "J'ai du jambon mais aucun oeuf !" |
|
982 msgstr[1] "J'ai du jambon et %d oeufs !" |
|
983 <BLANKLINE> |
|
984 """ |
|
985 |
|
986 def __init__(self, *args, **kwargs): |
|
987 """POEntry constructor.""" |
|
988 _BaseEntry.__init__(self, *args, **kwargs) |
|
989 self.comment = kwargs.get('comment', '') |
|
990 self.tcomment = kwargs.get('tcomment', '') |
|
991 self.occurrences = kwargs.get('occurrences', []) |
|
992 self.flags = kwargs.get('flags', []) |
|
993 self.previous_msgctxt = kwargs.get('previous_msgctxt', None) |
|
994 self.previous_msgid = kwargs.get('previous_msgid', None) |
|
995 self.previous_msgid_plural = kwargs.get('previous_msgid_plural', None) |
|
996 |
|
997 def __str__(self, wrapwidth=78): |
|
998 """ |
|
999 Return the string representation of the entry. |
|
1000 """ |
|
1001 if self.obsolete: |
|
1002 return _BaseEntry.__str__(self) |
|
1003 ret = [] |
|
1004 # comment first, if any (with text wrapping as xgettext does) |
|
1005 if self.comment != '': |
|
1006 for comment in self.comment.split('\n'): |
|
1007 if wrapwidth > 0 and len(comment) > wrapwidth-3: |
|
1008 ret += textwrap.wrap(comment, wrapwidth, |
|
1009 initial_indent='#. ', |
|
1010 subsequent_indent='#. ', |
|
1011 break_long_words=False) |
|
1012 else: |
|
1013 ret.append('#. %s' % comment) |
|
1014 # translator comment, if any (with text wrapping as xgettext does) |
|
1015 if self.tcomment != '': |
|
1016 for tcomment in self.tcomment.split('\n'): |
|
1017 if wrapwidth > 0 and len(tcomment) > wrapwidth-2: |
|
1018 ret += textwrap.wrap(tcomment, wrapwidth, |
|
1019 initial_indent='# ', |
|
1020 subsequent_indent='# ', |
|
1021 break_long_words=False) |
|
1022 else: |
|
1023 ret.append('# %s' % tcomment) |
|
1024 # occurrences (with text wrapping as xgettext does) |
|
1025 if self.occurrences: |
|
1026 filelist = [] |
|
1027 for fpath, lineno in self.occurrences: |
|
1028 if lineno: |
|
1029 filelist.append('%s:%s' % (fpath, lineno)) |
|
1030 else: |
|
1031 filelist.append(fpath) |
|
1032 filestr = ' '.join(filelist) |
|
1033 if wrapwidth > 0 and len(filestr)+3 > wrapwidth: |
|
1034 # XXX textwrap split words that contain hyphen, this is not |
|
1035 # what we want for filenames, so the dirty hack is to |
|
1036 # temporally replace hyphens with a char that a file cannot |
|
1037 # contain, like "*" |
|
1038 lines = textwrap.wrap(filestr.replace('-', '*'), |
|
1039 wrapwidth, |
|
1040 initial_indent='#: ', |
|
1041 subsequent_indent='#: ', |
|
1042 break_long_words=False) |
|
1043 # end of the replace hack |
|
1044 for line in lines: |
|
1045 ret.append(line.replace('*', '-')) |
|
1046 else: |
|
1047 ret.append('#: '+filestr) |
|
1048 # flags |
|
1049 if self.flags: |
|
1050 flags = [] |
|
1051 for flag in self.flags: |
|
1052 flags.append(flag) |
|
1053 ret.append('#, %s' % ', '.join(flags)) |
|
1054 |
|
1055 # previous context and previous msgid/msgid_plural |
|
1056 if self.previous_msgctxt: |
|
1057 ret += self._str_field("previous_msgctxt", "#| ", "", |
|
1058 self.previous_msgctxt) |
|
1059 if self.previous_msgid: |
|
1060 ret += self._str_field("previous_msgid", "#| ", "", |
|
1061 self.previous_msgid) |
|
1062 if self.previous_msgid_plural: |
|
1063 ret += self._str_field("previous_msgid_plural", "#| ", "", |
|
1064 self.previous_msgid_plural) |
|
1065 |
|
1066 ret.append(_BaseEntry.__str__(self)) |
|
1067 return '\n'.join(ret) |
|
1068 |
|
1069 def __cmp__(self, other): |
|
1070 ''' |
|
1071 Called by comparison operations if rich comparison is not defined. |
|
1072 |
|
1073 **Tests**: |
|
1074 >>> a = POEntry(msgid='a', occurrences=[('b.py', 1), ('b.py', 3)]) |
|
1075 >>> b = POEntry(msgid='b', occurrences=[('b.py', 1), ('b.py', 3)]) |
|
1076 >>> c1 = POEntry(msgid='c1', occurrences=[('a.py', 1), ('b.py', 1)]) |
|
1077 >>> c2 = POEntry(msgid='c2', occurrences=[('a.py', 1), ('a.py', 3)]) |
|
1078 >>> po = POFile() |
|
1079 >>> po.append(a) |
|
1080 >>> po.append(b) |
|
1081 >>> po.append(c1) |
|
1082 >>> po.append(c2) |
|
1083 >>> po.sort() |
|
1084 >>> print(po) |
|
1085 # |
|
1086 msgid "" |
|
1087 msgstr "" |
|
1088 <BLANKLINE> |
|
1089 #: a.py:1 a.py:3 |
|
1090 msgid "c2" |
|
1091 msgstr "" |
|
1092 <BLANKLINE> |
|
1093 #: a.py:1 b.py:1 |
|
1094 msgid "c1" |
|
1095 msgstr "" |
|
1096 <BLANKLINE> |
|
1097 #: b.py:1 b.py:3 |
|
1098 msgid "a" |
|
1099 msgstr "" |
|
1100 <BLANKLINE> |
|
1101 #: b.py:1 b.py:3 |
|
1102 msgid "b" |
|
1103 msgstr "" |
|
1104 <BLANKLINE> |
|
1105 ''' |
|
1106 def compare_occurrences(a, b): |
|
1107 """ |
|
1108 Compare an entry occurrence with another one. |
|
1109 """ |
|
1110 if a[0] != b[0]: |
|
1111 return a[0] < b[0] |
|
1112 if a[1] != b[1]: |
|
1113 return a[1] < b[1] |
|
1114 return 0 |
|
1115 |
|
1116 # First: Obsolete test |
|
1117 if self.obsolete != other.obsolete: |
|
1118 if self.obsolete: |
|
1119 return -1 |
|
1120 else: |
|
1121 return 1 |
|
1122 # Work on a copy to protect original |
|
1123 occ1 = self.occurrences[:] |
|
1124 occ2 = other.occurrences[:] |
|
1125 # Sorting using compare method |
|
1126 occ1.sort(compare_occurrences) |
|
1127 occ2.sort(compare_occurrences) |
|
1128 # Comparing sorted occurrences |
|
1129 pos = 0 |
|
1130 for entry1 in occ1: |
|
1131 try: |
|
1132 entry2 = occ2[pos] |
|
1133 except IndexError: |
|
1134 return 1 |
|
1135 pos = pos + 1 |
|
1136 if entry1[0] != entry2[0]: |
|
1137 if entry1[0] > entry2[0]: |
|
1138 return 1 |
|
1139 else: |
|
1140 return -1 |
|
1141 if entry1[1] != entry2[1]: |
|
1142 if entry1[1] > entry2[1]: |
|
1143 return 1 |
|
1144 else: |
|
1145 return -1 |
|
1146 # Finally: Compare message ID |
|
1147 if self.msgid > other.msgid: return 1 |
|
1148 else: return -1 |
|
1149 |
|
1150 def translated(self): |
|
1151 """ |
|
1152 Return True if the entry has been translated or False. |
|
1153 """ |
|
1154 if self.obsolete or 'fuzzy' in self.flags: |
|
1155 return False |
|
1156 if self.msgstr != '': |
|
1157 return True |
|
1158 if self.msgstr_plural: |
|
1159 for pos in self.msgstr_plural: |
|
1160 if self.msgstr_plural[pos] == '': |
|
1161 return False |
|
1162 return True |
|
1163 return False |
|
1164 |
|
1165 def merge(self, other): |
|
1166 """ |
|
1167 Merge the current entry with the given pot entry. |
|
1168 """ |
|
1169 self.msgid = other.msgid |
|
1170 self.occurrences = other.occurrences |
|
1171 self.comment = other.comment |
|
1172 self.flags = other.flags |
|
1173 self.msgid_plural = other.msgid_plural |
|
1174 if other.msgstr_plural: |
|
1175 for pos in other.msgstr_plural: |
|
1176 try: |
|
1177 # keep existing translation at pos if any |
|
1178 self.msgstr_plural[pos] |
|
1179 except KeyError: |
|
1180 self.msgstr_plural[pos] = '' |
|
1181 |
|
1182 # }}} |
|
1183 # class MOEntry {{{ |
|
1184 |
|
1185 class MOEntry(_BaseEntry): |
|
1186 """ |
|
1187 Represents a mo file entry. |
|
1188 |
|
1189 **Examples**: |
|
1190 |
|
1191 >>> entry = MOEntry() |
|
1192 >>> entry.msgid = 'translate me !' |
|
1193 >>> entry.msgstr = 'traduisez moi !' |
|
1194 >>> print(entry) |
|
1195 msgid "translate me !" |
|
1196 msgstr "traduisez moi !" |
|
1197 <BLANKLINE> |
|
1198 """ |
|
1199 |
|
1200 def __str__(self, wrapwidth=78): |
|
1201 """ |
|
1202 Return the string representation of the entry. |
|
1203 """ |
|
1204 return _BaseEntry.__str__(self, wrapwidth) |
|
1205 |
|
1206 # }}} |
|
1207 # class _POFileParser {{{ |
|
1208 |
|
1209 class _POFileParser(object): |
|
1210 """ |
|
1211 A finite state machine to parse efficiently and correctly po |
|
1212 file format. |
|
1213 """ |
|
1214 |
|
1215 def __init__(self, fpath, *args, **kwargs): |
|
1216 """ |
|
1217 Constructor. |
|
1218 |
|
1219 **Arguments**: |
|
1220 - *fpath*: string, path to the po file |
|
1221 - *encoding*: string, the encoding to use, defaults to |
|
1222 "default_encoding" global variable (optional), |
|
1223 - *check_for_duplicates*: whether to check for duplicate entries |
|
1224 when adding entries to the file, default: False (optional). |
|
1225 """ |
|
1226 enc = kwargs.get('encoding', default_encoding) |
|
1227 check_dup = kwargs.get('check_for_duplicates', False) |
|
1228 try: |
|
1229 self.fhandle = codecs.open(fpath, 'rU', enc) |
|
1230 except LookupError: |
|
1231 enc = default_encoding |
|
1232 self.fhandle = codecs.open(fpath, 'rU', enc) |
|
1233 self.instance = POFile( |
|
1234 fpath=fpath, |
|
1235 encoding=enc, |
|
1236 check_for_duplicates=check_dup |
|
1237 ) |
|
1238 self.transitions = {} |
|
1239 self.current_entry = POEntry() |
|
1240 self.current_state = 'ST' |
|
1241 self.current_token = None |
|
1242 # two memo flags used in handlers |
|
1243 self.msgstr_index = 0 |
|
1244 self.entry_obsolete = 0 |
|
1245 # Configure the state machine, by adding transitions. |
|
1246 # Signification of symbols: |
|
1247 # * ST: Beginning of the file (start) |
|
1248 # * HE: Header |
|
1249 # * TC: a translation comment |
|
1250 # * GC: a generated comment |
|
1251 # * OC: a file/line occurence |
|
1252 # * FL: a flags line |
|
1253 # * CT: a message context |
|
1254 # * PC: a previous msgctxt |
|
1255 # * PM: a previous msgid |
|
1256 # * PP: a previous msgid_plural |
|
1257 # * MI: a msgid |
|
1258 # * MP: a msgid plural |
|
1259 # * MS: a msgstr |
|
1260 # * MX: a msgstr plural |
|
1261 # * MC: a msgid or msgstr continuation line |
|
1262 all = ['ST', 'HE', 'GC', 'OC', 'FL', 'CT', 'PC', 'PM', 'PP', 'TC', |
|
1263 'MS', 'MP', 'MX', 'MI'] |
|
1264 |
|
1265 self.add('TC', ['ST', 'HE'], 'HE') |
|
1266 self.add('TC', ['GC', 'OC', 'FL', 'TC', 'PC', 'PM', 'PP', 'MS', |
|
1267 'MP', 'MX', 'MI'], 'TC') |
|
1268 self.add('GC', all, 'GC') |
|
1269 self.add('OC', all, 'OC') |
|
1270 self.add('FL', all, 'FL') |
|
1271 self.add('PC', all, 'PC') |
|
1272 self.add('PM', all, 'PM') |
|
1273 self.add('PP', all, 'PP') |
|
1274 self.add('CT', ['ST', 'HE', 'GC', 'OC', 'FL', 'TC', 'PC', 'PM', |
|
1275 'PP', 'MS', 'MX'], 'CT') |
|
1276 self.add('MI', ['ST', 'HE', 'GC', 'OC', 'FL', 'CT', 'TC', 'PC', |
|
1277 'PM', 'PP', 'MS', 'MX'], 'MI') |
|
1278 self.add('MP', ['TC', 'GC', 'PC', 'PM', 'PP', 'MI'], 'MP') |
|
1279 self.add('MS', ['MI', 'MP', 'TC'], 'MS') |
|
1280 self.add('MX', ['MI', 'MX', 'MP', 'TC'], 'MX') |
|
1281 self.add('MC', ['CT', 'MI', 'MP', 'MS', 'MX', 'PM', 'PP', 'PC'], 'MC') |
|
1282 |
|
1283 def parse(self): |
|
1284 """ |
|
1285 Run the state machine, parse the file line by line and call process() |
|
1286 with the current matched symbol. |
|
1287 """ |
|
1288 i, lastlen = 1, 0 |
|
1289 for line in self.fhandle: |
|
1290 line = line.strip() |
|
1291 if line == '': |
|
1292 i = i+1 |
|
1293 continue |
|
1294 if line[:3] == '#~ ': |
|
1295 line = line[3:] |
|
1296 self.entry_obsolete = 1 |
|
1297 else: |
|
1298 self.entry_obsolete = 0 |
|
1299 self.current_token = line |
|
1300 if line[:2] == '#:': |
|
1301 # we are on a occurrences line |
|
1302 self.process('OC', i) |
|
1303 elif line[:9] == 'msgctxt "': |
|
1304 # we are on a msgctxt |
|
1305 self.process('CT', i) |
|
1306 elif line[:7] == 'msgid "': |
|
1307 # we are on a msgid |
|
1308 self.process('MI', i) |
|
1309 elif line[:8] == 'msgstr "': |
|
1310 # we are on a msgstr |
|
1311 self.process('MS', i) |
|
1312 elif line[:1] == '"' or line[:4] == '#| "': |
|
1313 # we are on a continuation line or some metadata |
|
1314 self.process('MC', i) |
|
1315 elif line[:14] == 'msgid_plural "': |
|
1316 # we are on a msgid plural |
|
1317 self.process('MP', i) |
|
1318 elif line[:7] == 'msgstr[': |
|
1319 # we are on a msgstr plural |
|
1320 self.process('MX', i) |
|
1321 elif line[:3] == '#, ': |
|
1322 # we are on a flags line |
|
1323 self.process('FL', i) |
|
1324 elif line[:2] == '# ' or line == '#': |
|
1325 if line == '#': line = line + ' ' |
|
1326 # we are on a translator comment line |
|
1327 self.process('TC', i) |
|
1328 elif line[:2] == '#.': |
|
1329 # we are on a generated comment line |
|
1330 self.process('GC', i) |
|
1331 elif line[:15] == '#| msgid_plural': |
|
1332 # we are on a previous msgid_plural |
|
1333 self.process('PP', i) |
|
1334 elif line[:8] == '#| msgid': |
|
1335 self.process('PM', i) |
|
1336 # we are on a previous msgid |
|
1337 elif line[:10] == '#| msgctxt': |
|
1338 # we are on a previous msgctxt |
|
1339 self.process('PC', i) |
|
1340 i = i+1 |
|
1341 |
|
1342 if self.current_entry: |
|
1343 # since entries are added when another entry is found, we must add |
|
1344 # the last entry here (only if there are lines) |
|
1345 self.instance.append(self.current_entry) |
|
1346 # before returning the instance, check if there's metadata and if |
|
1347 # so extract it in a dict |
|
1348 firstentry = self.instance[0] |
|
1349 if firstentry.msgid == '': # metadata found |
|
1350 # remove the entry |
|
1351 firstentry = self.instance.pop(0) |
|
1352 self.instance.metadata_is_fuzzy = firstentry.flags |
|
1353 key = None |
|
1354 for msg in firstentry.msgstr.splitlines(): |
|
1355 try: |
|
1356 key, val = msg.split(':', 1) |
|
1357 self.instance.metadata[key] = val.strip() |
|
1358 except: |
|
1359 if key is not None: |
|
1360 self.instance.metadata[key] += '\n'+ msg.strip() |
|
1361 # close opened file |
|
1362 self.fhandle.close() |
|
1363 return self.instance |
|
1364 |
|
1365 def add(self, symbol, states, next_state): |
|
1366 """ |
|
1367 Add a transition to the state machine. |
|
1368 Keywords arguments: |
|
1369 |
|
1370 symbol -- string, the matched token (two chars symbol) |
|
1371 states -- list, a list of states (two chars symbols) |
|
1372 next_state -- the next state the fsm will have after the action |
|
1373 """ |
|
1374 for state in states: |
|
1375 action = getattr(self, 'handle_%s' % next_state.lower()) |
|
1376 self.transitions[(symbol, state)] = (action, next_state) |
|
1377 |
|
1378 def process(self, symbol, linenum): |
|
1379 """ |
|
1380 Process the transition corresponding to the current state and the |
|
1381 symbol provided. |
|
1382 |
|
1383 Keywords arguments: |
|
1384 symbol -- string, the matched token (two chars symbol) |
|
1385 linenum -- integer, the current line number of the parsed file |
|
1386 """ |
|
1387 try: |
|
1388 (action, state) = self.transitions[(symbol, self.current_state)] |
|
1389 if action(): |
|
1390 self.current_state = state |
|
1391 except Exception, exc: |
|
1392 raise IOError('Syntax error in po file (line %s)' % linenum) |
|
1393 |
|
1394 # state handlers |
|
1395 |
|
1396 def handle_he(self): |
|
1397 """Handle a header comment.""" |
|
1398 if self.instance.header != '': |
|
1399 self.instance.header += '\n' |
|
1400 self.instance.header += self.current_token[2:] |
|
1401 return 1 |
|
1402 |
|
1403 def handle_tc(self): |
|
1404 """Handle a translator comment.""" |
|
1405 if self.current_state in ['MC', 'MS', 'MX']: |
|
1406 self.instance.append(self.current_entry) |
|
1407 self.current_entry = POEntry() |
|
1408 if self.current_entry.tcomment != '': |
|
1409 self.current_entry.tcomment += '\n' |
|
1410 self.current_entry.tcomment += self.current_token[2:] |
|
1411 return True |
|
1412 |
|
1413 def handle_gc(self): |
|
1414 """Handle a generated comment.""" |
|
1415 if self.current_state in ['MC', 'MS', 'MX']: |
|
1416 self.instance.append(self.current_entry) |
|
1417 self.current_entry = POEntry() |
|
1418 if self.current_entry.comment != '': |
|
1419 self.current_entry.comment += '\n' |
|
1420 self.current_entry.comment += self.current_token[3:] |
|
1421 return True |
|
1422 |
|
1423 def handle_oc(self): |
|
1424 """Handle a file:num occurence.""" |
|
1425 if self.current_state in ['MC', 'MS', 'MX']: |
|
1426 self.instance.append(self.current_entry) |
|
1427 self.current_entry = POEntry() |
|
1428 occurrences = self.current_token[3:].split() |
|
1429 for occurrence in occurrences: |
|
1430 if occurrence != '': |
|
1431 try: |
|
1432 fil, line = occurrence.split(':') |
|
1433 if not line.isdigit(): |
|
1434 fil = fil + line |
|
1435 line = '' |
|
1436 self.current_entry.occurrences.append((fil, line)) |
|
1437 except: |
|
1438 self.current_entry.occurrences.append((occurrence, '')) |
|
1439 return True |
|
1440 |
|
1441 def handle_fl(self): |
|
1442 """Handle a flags line.""" |
|
1443 if self.current_state in ['MC', 'MS', 'MX']: |
|
1444 self.instance.append(self.current_entry) |
|
1445 self.current_entry = POEntry() |
|
1446 self.current_entry.flags += self.current_token[3:].split(', ') |
|
1447 return True |
|
1448 |
|
1449 def handle_pp(self): |
|
1450 """Handle a previous msgid_plural line.""" |
|
1451 if self.current_state in ['MC', 'MS', 'MX']: |
|
1452 self.instance.append(self.current_entry) |
|
1453 self.current_entry = POEntry() |
|
1454 self.current_entry.previous_msgid_plural = \ |
|
1455 unescape(self.current_token[17:-1]) |
|
1456 return True |
|
1457 |
|
1458 def handle_pm(self): |
|
1459 """Handle a previous msgid line.""" |
|
1460 if self.current_state in ['MC', 'MS', 'MX']: |
|
1461 self.instance.append(self.current_entry) |
|
1462 self.current_entry = POEntry() |
|
1463 self.current_entry.previous_msgid = \ |
|
1464 unescape(self.current_token[10:-1]) |
|
1465 return True |
|
1466 |
|
1467 def handle_pc(self): |
|
1468 """Handle a previous msgctxt line.""" |
|
1469 if self.current_state in ['MC', 'MS', 'MX']: |
|
1470 self.instance.append(self.current_entry) |
|
1471 self.current_entry = POEntry() |
|
1472 self.current_entry.previous_msgctxt = \ |
|
1473 unescape(self.current_token[12:-1]) |
|
1474 return True |
|
1475 |
|
1476 def handle_ct(self): |
|
1477 """Handle a msgctxt.""" |
|
1478 if self.current_state in ['MC', 'MS', 'MX']: |
|
1479 self.instance.append(self.current_entry) |
|
1480 self.current_entry = POEntry() |
|
1481 self.current_entry.msgctxt = unescape(self.current_token[9:-1]) |
|
1482 return True |
|
1483 |
|
1484 def handle_mi(self): |
|
1485 """Handle a msgid.""" |
|
1486 if self.current_state in ['MC', 'MS', 'MX']: |
|
1487 self.instance.append(self.current_entry) |
|
1488 self.current_entry = POEntry() |
|
1489 self.current_entry.obsolete = self.entry_obsolete |
|
1490 self.current_entry.msgid = unescape(self.current_token[7:-1]) |
|
1491 return True |
|
1492 |
|
1493 def handle_mp(self): |
|
1494 """Handle a msgid plural.""" |
|
1495 self.current_entry.msgid_plural = unescape(self.current_token[14:-1]) |
|
1496 return True |
|
1497 |
|
1498 def handle_ms(self): |
|
1499 """Handle a msgstr.""" |
|
1500 self.current_entry.msgstr = unescape(self.current_token[8:-1]) |
|
1501 return True |
|
1502 |
|
1503 def handle_mx(self): |
|
1504 """Handle a msgstr plural.""" |
|
1505 index, value = self.current_token[7], self.current_token[11:-1] |
|
1506 self.current_entry.msgstr_plural[index] = unescape(value) |
|
1507 self.msgstr_index = index |
|
1508 return True |
|
1509 |
|
1510 def handle_mc(self): |
|
1511 """Handle a msgid or msgstr continuation line.""" |
|
1512 token = unescape(self.current_token[1:-1]) |
|
1513 if self.current_state == 'CT': |
|
1514 typ = 'msgctxt' |
|
1515 self.current_entry.msgctxt += token |
|
1516 elif self.current_state == 'MI': |
|
1517 typ = 'msgid' |
|
1518 self.current_entry.msgid += token |
|
1519 elif self.current_state == 'MP': |
|
1520 typ = 'msgid_plural' |
|
1521 self.current_entry.msgid_plural += token |
|
1522 elif self.current_state == 'MS': |
|
1523 typ = 'msgstr' |
|
1524 self.current_entry.msgstr += token |
|
1525 elif self.current_state == 'MX': |
|
1526 typ = 'msgstr[%s]' % self.msgstr_index |
|
1527 self.current_entry.msgstr_plural[self.msgstr_index] += token |
|
1528 elif self.current_state == 'PP': |
|
1529 typ = 'previous_msgid_plural' |
|
1530 token = token[3:] |
|
1531 self.current_entry.previous_msgid_plural += token |
|
1532 elif self.current_state == 'PM': |
|
1533 typ = 'previous_msgid' |
|
1534 token = token[3:] |
|
1535 self.current_entry.previous_msgid += token |
|
1536 elif self.current_state == 'PC': |
|
1537 typ = 'previous_msgctxt' |
|
1538 token = token[3:] |
|
1539 self.current_entry.previous_msgctxt += token |
|
1540 if typ not in self.current_entry._multiline_str: |
|
1541 self.current_entry._multiline_str[typ] = token |
|
1542 else: |
|
1543 self.current_entry._multiline_str[typ] += "__POLIB__NL__" + token |
|
1544 # don't change the current state |
|
1545 return False |
|
1546 |
|
1547 # }}} |
|
1548 # class _MOFileParser {{{ |
|
1549 |
|
1550 class _MOFileParser(object): |
|
1551 """ |
|
1552 A class to parse binary mo files. |
|
1553 """ |
|
1554 BIG_ENDIAN = 0xde120495 |
|
1555 LITTLE_ENDIAN = 0x950412de |
|
1556 |
|
1557 def __init__(self, fpath, *args, **kwargs): |
|
1558 """ |
|
1559 Constructor. |
|
1560 |
|
1561 **Arguments**: |
|
1562 - *fpath*: string, path to the po file |
|
1563 - *encoding*: string, the encoding to use, defaults to |
|
1564 "default_encoding" global variable (optional), |
|
1565 - *check_for_duplicates*: whether to check for duplicate entries |
|
1566 when adding entries to the file, default: False (optional). |
|
1567 """ |
|
1568 enc = kwargs.get('encoding', default_encoding) |
|
1569 check_dup = kwargs.get('check_for_duplicates', False) |
|
1570 self.fhandle = open(fpath, 'rb') |
|
1571 self.instance = MOFile( |
|
1572 fpath=fpath, |
|
1573 encoding=enc, |
|
1574 check_for_duplicates=check_dup |
|
1575 ) |
|
1576 |
|
1577 def parse_magicnumber(self): |
|
1578 """ |
|
1579 Parse the magic number and raise an exception if not valid. |
|
1580 """ |
|
1581 |
|
1582 def parse(self): |
|
1583 """ |
|
1584 Build the instance with the file handle provided in the |
|
1585 constructor. |
|
1586 """ |
|
1587 magic_number = self._readbinary('<I', 4) |
|
1588 if magic_number == self.LITTLE_ENDIAN: |
|
1589 ii = '<II' |
|
1590 elif magic_number == self.BIG_ENDIAN: |
|
1591 ii = '>II' |
|
1592 else: |
|
1593 raise IOError('Invalid mo file, magic number is incorrect !') |
|
1594 self.instance.magic_number = magic_number |
|
1595 # parse the version number and the number of strings |
|
1596 self.instance.version, numofstrings = self._readbinary(ii, 8) |
|
1597 # original strings and translation strings hash table offset |
|
1598 msgids_hash_offset, msgstrs_hash_offset = self._readbinary(ii, 8) |
|
1599 # move to msgid hash table and read length and offset of msgids |
|
1600 self.fhandle.seek(msgids_hash_offset) |
|
1601 msgids_index = [] |
|
1602 for i in range(numofstrings): |
|
1603 msgids_index.append(self._readbinary(ii, 8)) |
|
1604 # move to msgstr hash table and read length and offset of msgstrs |
|
1605 self.fhandle.seek(msgstrs_hash_offset) |
|
1606 msgstrs_index = [] |
|
1607 for i in range(numofstrings): |
|
1608 msgstrs_index.append(self._readbinary(ii, 8)) |
|
1609 # build entries |
|
1610 for i in range(numofstrings): |
|
1611 self.fhandle.seek(msgids_index[i][1]) |
|
1612 msgid = self.fhandle.read(msgids_index[i][0]) |
|
1613 self.fhandle.seek(msgstrs_index[i][1]) |
|
1614 msgstr = self.fhandle.read(msgstrs_index[i][0]) |
|
1615 if i == 0: # metadata |
|
1616 raw_metadata, metadata = msgstr.split('\n'), {} |
|
1617 for line in raw_metadata: |
|
1618 tokens = line.split(':', 1) |
|
1619 if tokens[0] != '': |
|
1620 try: |
|
1621 metadata[tokens[0]] = tokens[1].strip() |
|
1622 except IndexError: |
|
1623 metadata[tokens[0]] = '' |
|
1624 self.instance.metadata = metadata |
|
1625 continue |
|
1626 # test if we have a plural entry |
|
1627 msgid_tokens = msgid.split('\0') |
|
1628 if len(msgid_tokens) > 1: |
|
1629 entry = MOEntry( |
|
1630 msgid=msgid_tokens[0], |
|
1631 msgid_plural=msgid_tokens[1], |
|
1632 msgstr_plural=dict((k,v) for k,v in \ |
|
1633 enumerate(msgstr.split('\0'))) |
|
1634 ) |
|
1635 else: |
|
1636 entry = MOEntry(msgid=msgid, msgstr=msgstr) |
|
1637 self.instance.append(entry) |
|
1638 # close opened file |
|
1639 self.fhandle.close() |
|
1640 return self.instance |
|
1641 |
|
1642 def _readbinary(self, fmt, numbytes): |
|
1643 """ |
|
1644 Private method that unpack n bytes of data using format <fmt>. |
|
1645 It returns a tuple or a mixed value if the tuple length is 1. |
|
1646 """ |
|
1647 bytes = self.fhandle.read(numbytes) |
|
1648 tup = struct.unpack(fmt, bytes) |
|
1649 if len(tup) == 1: |
|
1650 return tup[0] |
|
1651 return tup |
|
1652 |
|
1653 # }}} |
|
1654 # __main__ {{{ |
|
1655 |
|
1656 if __name__ == '__main__': |
|
1657 """ |
|
1658 **Main function**:: |
|
1659 - to **test** the module just run: *python polib.py [-v]* |
|
1660 - to **profile** the module: *python polib.py -p <some_pofile.po>* |
|
1661 """ |
|
1662 import sys |
|
1663 if len(sys.argv) > 2 and sys.argv[1] == '-p': |
|
1664 def test(f): |
|
1665 if f.endswith('po'): |
|
1666 p = pofile(f) |
|
1667 else: |
|
1668 p = mofile(f) |
|
1669 s = unicode(p) |
|
1670 import profile |
|
1671 profile.run('test("'+sys.argv[2]+'")') |
|
1672 else: |
|
1673 import doctest |
|
1674 doctest.testmod() |
|
1675 |
|
1676 # }}} |