author | Mikael Berthe <mikael@lilotux.net> |
Sun, 20 May 2007 09:46:49 +0200 | |
changeset 39 | 2123977057c5 |
parent 29 | 602b355c5e89 |
permissions | -rw-r--r-- |
0
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
1 |
# babelizer.py - API for simple access to babelfish.altavista.com. |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
2 |
# Requires python 2.0 or better. |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
3 |
# |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
4 |
# See it in use at http://babel.MrFeinberg.com/ |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
5 |
|
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
6 |
"""API for simple access to babelfish.altavista.com. |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
7 |
|
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
8 |
Summary: |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
9 |
|
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
10 |
import babelizer |
29
602b355c5e89
Babel: Try to fix UTF issues
Mikael Berthe <mikael@lilotux.net>
parents:
18
diff
changeset
|
11 |
|
0
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
12 |
print ' '.join(babelizer.available_languages) |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
13 |
|
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
14 |
print babelizer.translate( 'How much is that doggie in the window?', |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
15 |
'English', 'French' ) |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
16 |
|
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
17 |
def babel_callback(phrase): |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
18 |
print phrase |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
19 |
sys.stdout.flush() |
29
602b355c5e89
Babel: Try to fix UTF issues
Mikael Berthe <mikael@lilotux.net>
parents:
18
diff
changeset
|
20 |
|
0
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
21 |
babelizer.babelize( 'I love a reigning knight.', |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
22 |
'English', 'German', |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
23 |
callback = babel_callback ) |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
24 |
|
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
25 |
available_languages |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
26 |
A list of languages available for use with babelfish. |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
27 |
|
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
28 |
translate( phrase, from_lang, to_lang ) |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
29 |
Uses babelfish to translate phrase from from_lang to to_lang. |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
30 |
|
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
31 |
babelize(phrase, from_lang, through_lang, limit = 12, callback = None) |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
32 |
Uses babelfish to translate back and forth between from_lang and |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
33 |
through_lang until either no more changes occur in translation or |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
34 |
limit iterations have been reached, whichever comes first. Takes |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
35 |
an optional callback function which should receive a single |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
36 |
parameter, being the next translation. Without the callback |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
37 |
returns a list of successive translations. |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
38 |
|
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
39 |
It's only guaranteed to work if 'english' is one of the two languages |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
40 |
given to either of the translation methods. |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
41 |
|
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
42 |
Both translation methods throw exceptions which are all subclasses of |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
43 |
BabelizerError. They include |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
44 |
|
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
45 |
LanguageNotAvailableError |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
46 |
Thrown on an attempt to use an unknown language. |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
47 |
|
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
48 |
BabelfishChangedError |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
49 |
Thrown when babelfish.altavista.com changes some detail of their |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
50 |
layout, and babelizer can no longer parse the results or submit |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
51 |
the correct form (a not infrequent occurance). |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
52 |
|
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
53 |
BabelizerIOError |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
54 |
Thrown for various networking and IO errors. |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
55 |
|
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
56 |
Version: $Id: babelizer.py,v 1.1.1.1 2005/09/29 21:38:49 mikem Exp $ |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
57 |
Author: Jonathan Feinberg <jdf@pobox.com> |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
58 |
""" |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
59 |
import re, string, urllib |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
60 |
|
29
602b355c5e89
Babel: Try to fix UTF issues
Mikael Berthe <mikael@lilotux.net>
parents:
18
diff
changeset
|
61 |
def unicode_urlencode(params): |
602b355c5e89
Babel: Try to fix UTF issues
Mikael Berthe <mikael@lilotux.net>
parents:
18
diff
changeset
|
62 |
if isinstance(params, dict): |
602b355c5e89
Babel: Try to fix UTF issues
Mikael Berthe <mikael@lilotux.net>
parents:
18
diff
changeset
|
63 |
params = params.items() |
602b355c5e89
Babel: Try to fix UTF issues
Mikael Berthe <mikael@lilotux.net>
parents:
18
diff
changeset
|
64 |
return urllib.urlencode([(k, isinstance(v, unicode) and v.encode('utf-8') or v) |
602b355c5e89
Babel: Try to fix UTF issues
Mikael Berthe <mikael@lilotux.net>
parents:
18
diff
changeset
|
65 |
for k, v in params]) |
602b355c5e89
Babel: Try to fix UTF issues
Mikael Berthe <mikael@lilotux.net>
parents:
18
diff
changeset
|
66 |
|
602b355c5e89
Babel: Try to fix UTF issues
Mikael Berthe <mikael@lilotux.net>
parents:
18
diff
changeset
|
67 |
|
0
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
68 |
""" |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
69 |
Various patterns I have encountered in looking for the babelfish result. |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
70 |
We try each of them in turn, based on the relative number of times I've |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
71 |
seen each of these patterns. $1.00 to anyone who can provide a heuristic |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
72 |
for knowing which one to use. This includes AltaVista employees. |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
73 |
""" |
29
602b355c5e89
Babel: Try to fix UTF issues
Mikael Berthe <mikael@lilotux.net>
parents:
18
diff
changeset
|
74 |
__where = [ re.compile(r'name=\"q\">([^<]*)'), |
0
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
75 |
re.compile(r'td bgcolor=white>([^<]*)'), |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
76 |
re.compile(r'<\/strong><br>([^<]*)'), |
29
602b355c5e89
Babel: Try to fix UTF issues
Mikael Berthe <mikael@lilotux.net>
parents:
18
diff
changeset
|
77 |
re.compile(r'<[Dd]iv style=padding:10px;[^>]*>([^<]*)') |
0
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
78 |
] |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
79 |
|
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
80 |
__languages = { 'english' : 'en', |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
81 |
'french' : 'fr', |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
82 |
'spanish' : 'es', |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
83 |
'german' : 'de', |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
84 |
'italian' : 'it', |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
85 |
'portuguese' : 'pt', |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
86 |
'russian' : 'ru', |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
87 |
'korean' : 'ko', |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
88 |
'chinese' : 'zh', |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
89 |
'japanese' : 'ja', |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
90 |
'en' : 'en', |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
91 |
'fr' : 'fr', |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
92 |
'es' : 'es', |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
93 |
'de' : 'de', |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
94 |
'it' : 'it', |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
95 |
'pt' : 'pt', |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
96 |
'ru' : 'ru', |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
97 |
'ko' : 'ko', |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
98 |
'zh' : 'zh', |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
99 |
'ja' : 'ja', |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
100 |
} |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
101 |
|
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
102 |
""" |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
103 |
All of the available language names. |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
104 |
""" |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
105 |
available_languages = [ x.title() for x in __languages.keys() ] |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
106 |
|
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
107 |
""" |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
108 |
Calling translate() or babelize() can raise a BabelizerError |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
109 |
""" |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
110 |
class BabelizerError(Exception): |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
111 |
pass |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
112 |
|
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
113 |
class LanguageNotAvailableError(BabelizerError): |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
114 |
pass |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
115 |
class BabelfishChangedError(BabelizerError): |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
116 |
pass |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
117 |
class BabelizerIOError(BabelizerError): |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
118 |
pass |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
119 |
|
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
120 |
def clean(text): |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
121 |
return ' '.join(string.replace(text.strip(), "\n", ' ').split()) |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
122 |
|
29
602b355c5e89
Babel: Try to fix UTF issues
Mikael Berthe <mikael@lilotux.net>
parents:
18
diff
changeset
|
123 |
def translate(phrase, from_lang, to_lang, utf=1): |
0
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
124 |
phrase = clean(phrase) |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
125 |
try: |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
126 |
from_code = __languages[from_lang.lower()] |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
127 |
to_code = __languages[to_lang.lower()] |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
128 |
except KeyError, lang: |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
129 |
raise LanguageNotAvailableError(lang) |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
130 |
|
29
602b355c5e89
Babel: Try to fix UTF issues
Mikael Berthe <mikael@lilotux.net>
parents:
18
diff
changeset
|
131 |
if utf: |
602b355c5e89
Babel: Try to fix UTF issues
Mikael Berthe <mikael@lilotux.net>
parents:
18
diff
changeset
|
132 |
phrase.encode('utf-8', 'replace') |
602b355c5e89
Babel: Try to fix UTF issues
Mikael Berthe <mikael@lilotux.net>
parents:
18
diff
changeset
|
133 |
|
602b355c5e89
Babel: Try to fix UTF issues
Mikael Berthe <mikael@lilotux.net>
parents:
18
diff
changeset
|
134 |
params = unicode_urlencode( { 'doit' : 'done', |
0
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
135 |
'tt' : 'urltext', |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
136 |
'intl' : '1', |
29
602b355c5e89
Babel: Try to fix UTF issues
Mikael Berthe <mikael@lilotux.net>
parents:
18
diff
changeset
|
137 |
'urltext' : phrase, |
0
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
138 |
'lp' : from_code + '_' + to_code } ) |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
139 |
try: |
29
602b355c5e89
Babel: Try to fix UTF issues
Mikael Berthe <mikael@lilotux.net>
parents:
18
diff
changeset
|
140 |
babel = urllib.FancyURLopener() |
602b355c5e89
Babel: Try to fix UTF issues
Mikael Berthe <mikael@lilotux.net>
parents:
18
diff
changeset
|
141 |
babel.addheader('Accept-Charset', 'ISO-8859-1,utf-8;q=0.7,*;q=0.7') |
602b355c5e89
Babel: Try to fix UTF issues
Mikael Berthe <mikael@lilotux.net>
parents:
18
diff
changeset
|
142 |
response = babel.open('http://babelfish.altavista.com/tr', params) |
0
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
143 |
except IOError, what: |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
144 |
raise BabelizerIOError("Couldn't talk to server: %s" % what) |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
145 |
except: |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
146 |
print "Unexpected error:", sys.exc_info()[0] |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
147 |
|
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
148 |
html = response.read() |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
149 |
for regex in __where: |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
150 |
match = regex.search(html) |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
151 |
if match: break |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
152 |
if not match: raise BabelfishChangedError("Can't recognize translated string.") |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
153 |
return clean(match.group(1)) |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
154 |
|
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
155 |
def babelize(phrase, from_language, through_language, limit = 12, callback = None): |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
156 |
phrase = clean(phrase) |
29
602b355c5e89
Babel: Try to fix UTF issues
Mikael Berthe <mikael@lilotux.net>
parents:
18
diff
changeset
|
157 |
phrase.encode('utf-8', 'replace') |
0
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
158 |
seen = { phrase: 1 } |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
159 |
if callback: |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
160 |
callback(phrase) |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
161 |
else: |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
162 |
results = [ phrase ] |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
163 |
flip = { from_language: through_language, through_language: from_language } |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
164 |
next = from_language |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
165 |
for i in range(limit): |
29
602b355c5e89
Babel: Try to fix UTF issues
Mikael Berthe <mikael@lilotux.net>
parents:
18
diff
changeset
|
166 |
phrase = translate(phrase, next, flip[next], None) |
0
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
167 |
if seen.has_key(phrase): break |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
168 |
seen[phrase] = 1 |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
169 |
if callback: |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
170 |
callback(phrase) |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
171 |
else: |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
172 |
results.append(phrase) |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
173 |
next = flip[next] |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
174 |
if not callback: return results |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
175 |
|
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
176 |
if __name__ == '__main__': |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
177 |
import sys |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
178 |
def printer(x): |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
179 |
print x |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
180 |
sys.stdout.flush(); |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
181 |
|
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
182 |
|
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
183 |
babelize("I won't take that sort of treatment from you, or from your doggie!", |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
184 |
'english', 'french', callback = printer) |