i18n/hggettext
changeset 8542 de150a942ec8
child 8626 1fc1c77d4863
equal deleted inserted replaced
8541:06ace50443f6 8542:de150a942ec8
       
     1 #!/usr/bin/env python
       
     2 #
       
     3 # hggettext - carefully extract docstrings for Mercurial
       
     4 #
       
     5 # Copyright 2009 Matt Mackall <mpm@selenic.com> and others
       
     6 #
       
     7 # This software may be used and distributed according to the terms of the
       
     8 # GNU General Public License version 2, incorporated herein by reference.
       
     9 
       
    10 # The normalize function is taken from pygettext which is distributed
       
    11 # with Python under the Python License, which is GPL compatible.
       
    12 
       
    13 """Extract docstrings from Mercurial commands.
       
    14 
       
    15 Compared to pygettext, this script knows about the cmdtable and table
       
    16 dictionaries used by Mercurial, and will only extract docstrings from
       
    17 functions mentioned therein.
       
    18 
       
    19 Use xgettext like normal to extract strings marked as translatable and
       
    20 join the message cataloges to get the final catalog.
       
    21 """
       
    22 
       
    23 from mercurial import demandimport; demandimport.enable()
       
    24 import sys, inspect
       
    25 
       
    26 
       
    27 def escape(s):
       
    28     # The order is important, the backslash must be escaped first
       
    29     # since the other replacements introduce new backslashes
       
    30     # themselves.
       
    31     s = s.replace('\\', '\\\\')
       
    32     s = s.replace('\n', '\\n')
       
    33     s = s.replace('\r', '\\r')
       
    34     s = s.replace('\t', '\\t')
       
    35     s = s.replace('"', '\\"')
       
    36     return s
       
    37 
       
    38 
       
    39 def normalize(s):
       
    40     # This converts the various Python string types into a format that
       
    41     # is appropriate for .po files, namely much closer to C style.
       
    42     lines = s.split('\n')
       
    43     if len(lines) == 1:
       
    44         s = '"' + escape(s) + '"'
       
    45     else:
       
    46         if not lines[-1]:
       
    47             del lines[-1]
       
    48             lines[-1] = lines[-1] + '\n'
       
    49         lines = map(escape, lines)
       
    50         lineterm = '\\n"\n"'
       
    51         s = '""\n"' + lineterm.join(lines) + '"'
       
    52     return s
       
    53 
       
    54 
       
    55 def poentry(path, lineno, s):
       
    56     return ('#: %s:%d\n' % (path, lineno) +
       
    57             'msgid %s\n' % normalize(s) +
       
    58             'msgstr ""\n')
       
    59 
       
    60 
       
    61 def offset(src, doc, name, default):
       
    62     """Compute offset or issue a warning on stdout."""
       
    63     # Backslashes in doc appear doubled in src.
       
    64     end = src.find(doc.replace('\\', '\\\\'))
       
    65     if end == -1:
       
    66         # This can happen if the docstring contains unnecessary escape
       
    67         # sequences such as \" in a triple-quoted string. The problem
       
    68         # is that \" is turned into " and so doc wont appear in src.
       
    69         sys.stderr.write("warning: unknown offset in %s, assuming %d lines\n"
       
    70                          % (name, default))
       
    71         return default
       
    72     else:
       
    73         return src.count('\n', 0, end)
       
    74 
       
    75 
       
    76 def importpath(path):
       
    77     """Import a path like foo/bar/baz.py and return the baz module."""
       
    78     if path.endswith('.py'):
       
    79         path = path[:-3]
       
    80     if path.endswith('/__init__'):
       
    81         path = path[:-9]
       
    82     path = path.replace('/', '.')
       
    83     mod = __import__(path)
       
    84     for comp in path.split('.')[1:]:
       
    85         mod = getattr(mod, comp)
       
    86     return mod
       
    87 
       
    88 
       
    89 def docstrings(path):
       
    90     """Extract docstrings from path.
       
    91 
       
    92     This respects the Mercurial cmdtable/table convention and will
       
    93     only extract docstrings from functions mentioned in these tables.
       
    94     """
       
    95     mod = importpath(path)
       
    96     if mod.__doc__:
       
    97         src = open(path).read()
       
    98         lineno = 1 + offset(src, mod.__doc__, path, 7)
       
    99         print poentry(path, lineno, mod.__doc__)
       
   100 
       
   101     cmdtable = getattr(mod, 'cmdtable', {})
       
   102     if not cmdtable:
       
   103         # Maybe we are processing mercurial.commands?
       
   104         cmdtable = getattr(mod, 'table', {})
       
   105 
       
   106     for entry in cmdtable.itervalues():
       
   107         func = entry[0]
       
   108         if func.__doc__:
       
   109             src = inspect.getsource(func)
       
   110             name = "%s.%s" % (path, func.__name__)
       
   111             lineno = func.func_code.co_firstlineno
       
   112             lineno += offset(src, func.__doc__, name, 1)
       
   113             print poentry(path, lineno, func.__doc__)
       
   114 
       
   115 
       
   116 if __name__ == "__main__":
       
   117     for path in sys.argv[1:]:
       
   118         docstrings(path)