author | Mikael Berthe <mikael@lilotux.net> |
Thu, 03 May 2007 19:32:28 +0200 | |
changeset 33 | adf562e77977 |
parent 18 | 3a35dd9adc73 |
permissions | -rw-r--r-- |
0
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
1 |
import re, string, urllib |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
2 |
|
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
3 |
__where = [re.compile(r'<span id="r_text" name="r_text">([^<]*)')] |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
4 |
|
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
5 |
class BabelizerError(Exception): |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
6 |
pass |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
7 |
|
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
8 |
class LanguageNotAvailableError(BabelizerError): |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
9 |
pass |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
10 |
class BabelfishChangedError(BabelizerError): |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
11 |
pass |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
12 |
class BabelizerIOError(BabelizerError): |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
13 |
pass |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
14 |
|
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
15 |
def clean(text): |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
16 |
return ' '.join(string.replace(text.strip(), "\n", ' ').split()) |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
17 |
|
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
18 |
def translate(phrase, from_lang, to_lang): |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
19 |
from_lang = from_lang.lower() |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
20 |
to_lang = to_lang.lower() |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
21 |
if from_lang == 'es': |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
22 |
from_lang = 's' |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
23 |
if to_lang == 'es': |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
24 |
to_lang = 's' |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
25 |
if from_lang[0] not in ['e', 's', 'r', 'i', 'g', 'f'] or to_lang[0] not in ['e', 's', 'r', 'i', 'g', 'f']: |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
26 |
raise LanguageNotAvailableError(lang) |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
27 |
direction = from_lang[0] + to_lang[0] |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
28 |
phrase=phrase.encode('cp1251') |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
29 |
params = urllib.urlencode( { |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
30 |
'lang': 'en', |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
31 |
'status': 'translate', |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
32 |
'source': phrase, |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
33 |
'direction': direction, |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
34 |
'template': 'General', |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
35 |
} ) |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
36 |
try: |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
37 |
response = urllib.urlopen('http://www.translate.ru/text.asp', params) |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
38 |
except IOError, what: |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
39 |
raise BabelizerIOError("Couldn't talk to server: %s" % what) |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
40 |
except: |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
41 |
print "Unexpected error:", sys.exc_info()[0] |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
42 |
|
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
43 |
html = unicode(response.read(), 'CP1251') |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
44 |
for regex in __where: |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
45 |
match = regex.search(html) |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
46 |
if match: break |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
47 |
if not match: raise BabelfishChangedError("Can't recognize translated string.") |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
48 |
""" |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
49 |
current_unichr = '' |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
50 |
result = '' |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
51 |
words = string.split(clean(match.group(1))) |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
52 |
chars = list(clean(match.group(1))) |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
53 |
for char in chars: |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
54 |
if char == '&': |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
55 |
current_unichr = '&' |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
56 |
continue |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
57 |
elif current_unichr: |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
58 |
if char == '#': |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
59 |
pass |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
60 |
elif char == ';': |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
61 |
result += unichr(int(current_unichr[1:])) |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
62 |
current_unichr = '' |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
63 |
else: |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
64 |
current_unichr += char |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
65 |
continue |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
66 |
result += char |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
67 |
""" |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
68 |
#if to_lang == 'r': |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
69 |
result = clean(match.group(1)) |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
70 |
#result = unicode(result, 'CP1251') |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
71 |
#else: |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
72 |
# result = string.split(string.split(html, '<span id="r_text" name="r_text">')[1], '<')[0] |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
73 |
#result = string.split(string.split(html, '<span id="r_text" name="r_text">')[1], '<')[0] |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
74 |
return result |
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
75 |
|
93b25987d3e5
Initial Mercurial repository
Mikael Berthe <mikael@lilotux.net>
parents:
diff
changeset
|
76 |
#print translate('hello') |