|
1 # babelizer.py - API for simple access to babelfish.altavista.com. |
|
2 # Requires python 2.0 or better. |
|
3 # |
|
4 # See it in use at http://babel.MrFeinberg.com/ |
|
5 |
|
6 """API for simple access to babelfish.altavista.com. |
|
7 |
|
8 Summary: |
|
9 |
|
10 import babelizer |
|
11 |
|
12 print ' '.join(babelizer.available_languages) |
|
13 |
|
14 print babelizer.translate( 'How much is that doggie in the window?', |
|
15 'English', 'French' ) |
|
16 |
|
17 def babel_callback(phrase): |
|
18 print phrase |
|
19 sys.stdout.flush() |
|
20 |
|
21 babelizer.babelize( 'I love a reigning knight.', |
|
22 'English', 'German', |
|
23 callback = babel_callback ) |
|
24 |
|
25 available_languages |
|
26 A list of languages available for use with babelfish. |
|
27 |
|
28 translate( phrase, from_lang, to_lang ) |
|
29 Uses babelfish to translate phrase from from_lang to to_lang. |
|
30 |
|
31 babelize(phrase, from_lang, through_lang, limit = 12, callback = None) |
|
32 Uses babelfish to translate back and forth between from_lang and |
|
33 through_lang until either no more changes occur in translation or |
|
34 limit iterations have been reached, whichever comes first. Takes |
|
35 an optional callback function which should receive a single |
|
36 parameter, being the next translation. Without the callback |
|
37 returns a list of successive translations. |
|
38 |
|
39 It's only guaranteed to work if 'english' is one of the two languages |
|
40 given to either of the translation methods. |
|
41 |
|
42 Both translation methods throw exceptions which are all subclasses of |
|
43 BabelizerError. They include |
|
44 |
|
45 LanguageNotAvailableError |
|
46 Thrown on an attempt to use an unknown language. |
|
47 |
|
48 BabelfishChangedError |
|
49 Thrown when babelfish.altavista.com changes some detail of their |
|
50 layout, and babelizer can no longer parse the results or submit |
|
51 the correct form (a not infrequent occurance). |
|
52 |
|
53 BabelizerIOError |
|
54 Thrown for various networking and IO errors. |
|
55 |
|
56 Version: $Id: babelizer.py,v 1.1.1.1 2005/09/29 21:38:49 mikem Exp $ |
|
57 Author: Jonathan Feinberg <jdf@pobox.com> |
|
58 """ |
|
59 import re, string, urllib |
|
60 |
|
61 """ |
|
62 Various patterns I have encountered in looking for the babelfish result. |
|
63 We try each of them in turn, based on the relative number of times I've |
|
64 seen each of these patterns. $1.00 to anyone who can provide a heuristic |
|
65 for knowing which one to use. This includes AltaVista employees. |
|
66 """ |
|
67 __where = [ re.compile(r'name=\"q\">([^<]*)'), |
|
68 re.compile(r'td bgcolor=white>([^<]*)'), |
|
69 re.compile(r'<\/strong><br>([^<]*)'), |
|
70 re.compile(r'<Div style=padding:10px;[^>]*>([^<]*)') |
|
71 ] |
|
72 |
|
73 __languages = { 'english' : 'en', |
|
74 'french' : 'fr', |
|
75 'spanish' : 'es', |
|
76 'german' : 'de', |
|
77 'italian' : 'it', |
|
78 'portuguese' : 'pt', |
|
79 'russian' : 'ru', |
|
80 'korean' : 'ko', |
|
81 'chinese' : 'zh', |
|
82 'japanese' : 'ja', |
|
83 'en' : 'en', |
|
84 'fr' : 'fr', |
|
85 'es' : 'es', |
|
86 'de' : 'de', |
|
87 'it' : 'it', |
|
88 'pt' : 'pt', |
|
89 'ru' : 'ru', |
|
90 'ko' : 'ko', |
|
91 'zh' : 'zh', |
|
92 'ja' : 'ja', |
|
93 } |
|
94 |
|
95 """ |
|
96 All of the available language names. |
|
97 """ |
|
98 available_languages = [ x.title() for x in __languages.keys() ] |
|
99 |
|
100 """ |
|
101 Calling translate() or babelize() can raise a BabelizerError |
|
102 """ |
|
103 class BabelizerError(Exception): |
|
104 pass |
|
105 |
|
106 class LanguageNotAvailableError(BabelizerError): |
|
107 pass |
|
108 class BabelfishChangedError(BabelizerError): |
|
109 pass |
|
110 class BabelizerIOError(BabelizerError): |
|
111 pass |
|
112 |
|
113 def clean(text): |
|
114 return ' '.join(string.replace(text.strip(), "\n", ' ').split()) |
|
115 |
|
116 def translate(phrase, from_lang, to_lang): |
|
117 phrase = clean(phrase) |
|
118 try: |
|
119 from_code = __languages[from_lang.lower()] |
|
120 to_code = __languages[to_lang.lower()] |
|
121 except KeyError, lang: |
|
122 raise LanguageNotAvailableError(lang) |
|
123 |
|
124 params = urllib.urlencode( { 'doit' : 'done', |
|
125 'tt' : 'urltext', |
|
126 'intl' : '1', |
|
127 'urltext' : phrase.encode('utf-8', 'replace'), |
|
128 'lp' : from_code + '_' + to_code } ) |
|
129 try: |
|
130 response = urllib.urlopen('http://babelfish.altavista.com/babelfish/tr', params) |
|
131 except IOError, what: |
|
132 raise BabelizerIOError("Couldn't talk to server: %s" % what) |
|
133 except: |
|
134 print "Unexpected error:", sys.exc_info()[0] |
|
135 |
|
136 html = response.read() |
|
137 for regex in __where: |
|
138 match = regex.search(html) |
|
139 if match: break |
|
140 if not match: raise BabelfishChangedError("Can't recognize translated string.") |
|
141 return clean(match.group(1)) |
|
142 |
|
143 def babelize(phrase, from_language, through_language, limit = 12, callback = None): |
|
144 phrase = clean(phrase) |
|
145 seen = { phrase: 1 } |
|
146 if callback: |
|
147 callback(phrase) |
|
148 else: |
|
149 results = [ phrase ] |
|
150 flip = { from_language: through_language, through_language: from_language } |
|
151 next = from_language |
|
152 for i in range(limit): |
|
153 phrase = translate(phrase, next, flip[next]) |
|
154 if seen.has_key(phrase): break |
|
155 seen[phrase] = 1 |
|
156 if callback: |
|
157 callback(phrase) |
|
158 else: |
|
159 results.append(phrase) |
|
160 next = flip[next] |
|
161 if not callback: return results |
|
162 |
|
163 if __name__ == '__main__': |
|
164 import sys |
|
165 def printer(x): |
|
166 print x |
|
167 sys.stdout.flush(); |
|
168 |
|
169 |
|
170 babelize("I won't take that sort of treatment from you, or from your doggie!", |
|
171 'english', 'french', callback = printer) |