--- a/modules/babelizer.py Wed May 02 22:38:53 2007 +0200
+++ b/modules/babelizer.py Wed May 02 23:53:08 2007 +0200
@@ -8,7 +8,7 @@
Summary:
import babelizer
-
+
print ' '.join(babelizer.available_languages)
print babelizer.translate( 'How much is that doggie in the window?',
@@ -17,7 +17,7 @@
def babel_callback(phrase):
print phrase
sys.stdout.flush()
-
+
babelizer.babelize( 'I love a reigning knight.',
'English', 'German',
callback = babel_callback )
@@ -58,16 +58,23 @@
"""
import re, string, urllib
+def unicode_urlencode(params):
+ if isinstance(params, dict):
+ params = params.items()
+ return urllib.urlencode([(k, isinstance(v, unicode) and v.encode('utf-8') or v)
+ for k, v in params])
+
+
"""
Various patterns I have encountered in looking for the babelfish result.
We try each of them in turn, based on the relative number of times I've
seen each of these patterns. $1.00 to anyone who can provide a heuristic
for knowing which one to use. This includes AltaVista employees.
"""
-__where = [ re.compile(r'name=\"q\">([^<]*)'),
+__where = [ re.compile(r'name=\"q\">([^<]*)'),
re.compile(r'td bgcolor=white>([^<]*)'),
re.compile(r'<\/strong><br>([^<]*)'),
- re.compile(r'<Div style=padding:10px;[^>]*>([^<]*)')
+ re.compile(r'<[Dd]iv style=padding:10px;[^>]*>([^<]*)')
]
__languages = { 'english' : 'en',
@@ -113,7 +120,7 @@
def clean(text):
return ' '.join(string.replace(text.strip(), "\n", ' ').split())
-def translate(phrase, from_lang, to_lang):
+def translate(phrase, from_lang, to_lang, utf=1):
phrase = clean(phrase)
try:
from_code = __languages[from_lang.lower()]
@@ -121,13 +128,18 @@
except KeyError, lang:
raise LanguageNotAvailableError(lang)
- params = urllib.urlencode( { 'doit' : 'done',
+ if utf:
+ phrase.encode('utf-8', 'replace')
+
+ params = unicode_urlencode( { 'doit' : 'done',
'tt' : 'urltext',
'intl' : '1',
- 'urltext' : phrase.encode('utf-8', 'replace'),
+ 'urltext' : phrase,
'lp' : from_code + '_' + to_code } )
try:
- response = urllib.urlopen('http://babelfish.altavista.com/babelfish/tr', params)
+ babel = urllib.FancyURLopener()
+ babel.addheader('Accept-Charset', 'ISO-8859-1,utf-8;q=0.7,*;q=0.7')
+ response = babel.open('http://babelfish.altavista.com/tr', params)
except IOError, what:
raise BabelizerIOError("Couldn't talk to server: %s" % what)
except:
@@ -142,6 +154,7 @@
def babelize(phrase, from_language, through_language, limit = 12, callback = None):
phrase = clean(phrase)
+ phrase.encode('utf-8', 'replace')
seen = { phrase: 1 }
if callback:
callback(phrase)
@@ -150,7 +163,7 @@
flip = { from_language: through_language, through_language: from_language }
next = from_language
for i in range(limit):
- phrase = translate(phrase, next, flip[next])
+ phrase = translate(phrase, next, flip[next], None)
if seen.has_key(phrase): break
seen[phrase] = 1
if callback:
--- a/plugins/babel_plugin.py Wed May 02 22:38:53 2007 +0200
+++ b/plugins/babel_plugin.py Wed May 02 23:53:08 2007 +0200
@@ -58,7 +58,7 @@
results = babelizer.babelize(body, from_lang, through_lang)
reply = ''
for result in results:
- reply += '\n' + result
+ reply += '\n' + result.decode('utf-8', 'replace')
except babelizer.LanguageNotAvailableError:
reply = 'Invalid Language'
except babelizer.BabelfishChangedError: