plugins/rss_plugin.py
changeset 0 93b25987d3e5
child 17 069f7fd5545d
equal deleted inserted replaced
-1:000000000000 0:93b25987d3e5
       
     1 #$ neutron_plugin 01
       
     2 
       
     3 from xml.sax import make_parser, handler
       
     4 
       
     5 RSS_CACHE_FILE = 'dynamic/RSS_CACHE.txt'
       
     6 RSS_INTERVAL = 30
       
     7 RSS_QUERY_DELAY = 10
       
     8 RSS_ITEM_DELAY = 120
       
     9 
       
    10 RSS_CACHE = {}
       
    11 last_query = 0
       
    12 UNSENT_HEADLINES = []
       
    13 RSS_IS_ENABLED = 0
       
    14 
       
    15 initialize_file(RSS_CACHE_FILE, "{'channels': {}}")
       
    16 
       
    17 ################################################################################
       
    18 
       
    19 import re
       
    20 def rss_remove_html(text):
       
    21 	exp = re.compile('<[^>]*>')
       
    22 	text = exp.sub('', text)
       
    23 	notags = text.replace('&lt;', '<').replace('&gt;', '>')
       
    24 	noescape = notags.replace('&amp;', '&').replace('&lt;', '<').replace('&gt;', '>').replace('&quot;', '"')
       
    25 	noescape = noescape.replace('&lt;p&gt;', '')
       
    26 	noescape = noescape.replace('&lt;/p&gt;', '')
       
    27 	noescape = noescape.replace('&lt;p /&gt;', '').replace('&lt;p/&gt;', '')
       
    28 	return noescape
       
    29 
       
    30 """ OLD CODE: REMOVE LATER IF NEW FUNCTION (added 2005-10-12) WORKS
       
    31 def rss_remove_html(text):
       
    32 	notags = text.replace('&lt;', '<').replace('&gt;', '>')
       
    33 	noescape = notags.replace('&amp;', '&').replace('&lt;', '<').replace('&gt;', '>').replace('&quot;', '"')
       
    34 	noescape = noescape.replace('&lt;p&gt;', '')
       
    35 	noescape = noescape.replace('&lt;/p&gt;', '')
       
    36 	noescape = noescape.replace('&lt;p /&gt;', '').replace('&lt;p/&gt;', '')
       
    37 	return noescape
       
    38 """
       
    39 
       
    40 def rss_update_file():
       
    41 	global RSS_CACHE
       
    42 	write_file(RSS_CACHE_FILE, str(RSS_CACHE))
       
    43 
       
    44 def rss_read_file():
       
    45 	global RSS_CACHE
       
    46 	RSS_CACHE = eval(read_file(RSS_CACHE_FILE))
       
    47 
       
    48 def rss_add_channel(name, url):
       
    49 	global RSS_CACHE
       
    50 	if not RSS_CACHE['channels'].has_key(name):
       
    51 		RSS_CACHE['channels'][name] = {'url': url, 'lastitem': '', 'subscribers': [], 'title': name, 'link': '', 'description': name}
       
    52 		rss_update_file()
       
    53 	else:
       
    54 		RSS_CACHE['channels'][name]['url'] = url
       
    55 
       
    56 def rss_remove_channel(name):
       
    57 	global RSS_CACHE
       
    58 	if RSS_CACHE['channels'].has_key(name):
       
    59 		del RSS_CACHE['channels'][name]
       
    60 		rss_update_file()
       
    61 
       
    62 def rss_subscribe(name, jid):
       
    63 	global RSS_CACHE
       
    64 	if RSS_CACHE['channels'].has_key(name):
       
    65 		if not jid in RSS_CACHE['channels'][name]['subscribers']:
       
    66 			RSS_CACHE['channels'][name]['subscribers'].append(jid)
       
    67 			rss_update_file()
       
    68 
       
    69 def rss_unsubscribe(name, jid):
       
    70 	global RSS_CACHE
       
    71 	if RSS_CACHE['channels'].has_key(name):
       
    72 		if jid in RSS_CACHE['channels'][name]['subscribers']:
       
    73 			RSS_CACHE['channels'][name]['subscribers'].remove(jid)
       
    74 			rss_update_file()
       
    75 
       
    76 def rss_query_channels_loop():
       
    77 	RSS_IS_ENABLED = 1
       
    78 	while RSS_IS_ENABLED:
       
    79 		rss_query_channels()
       
    80 		time.sleep(RSS_ITEM_DELAY)
       
    81 		if len(UNSENT_HEADLINES):
       
    82 			random.shuffle(UNSENT_HEADLINES)
       
    83 			(channel, item) = UNSENT_HEADLINES.pop()
       
    84 			rss_dispatch_headline(channel, item)
       
    85 
       
    86 def rss_end_loop():
       
    87 	RSS_IS_ENABLED = 0
       
    88 
       
    89 def rss_query_channels():
       
    90 	global RSS_CACHE
       
    91 	global last_query
       
    92 	if time.time() > last_query + (RSS_INTERVAL * 60):
       
    93 		print 'Querying Channels'
       
    94 		last_query = time.time()
       
    95 		for channel in RSS_CACHE['channels']:
       
    96 			rss_query_channel(channel)
       
    97 			time.sleep(RSS_QUERY_DELAY)
       
    98 		print 'Finished Querying Headlines'
       
    99 
       
   100 def rss_query_channel(channel):
       
   101 	print 'Querying: "' + channel + '"'
       
   102 	parser = make_parser()
       
   103 	parser.setContentHandler(RSSHandler(channel))
       
   104 	try:
       
   105 		parser.parse(RSS_CACHE['channels'][channel]['url'])
       
   106 	except:
       
   107 		#raise
       
   108 		print 'error parsing: ' + channel
       
   109 
       
   110 def rss_dispatch_headlines(channel, info, items):
       
   111 	global RSS_CACHE
       
   112 	RSS_CACHE['channels'][channel]['title'] = info['title']
       
   113 	RSS_CACHE['channels'][channel]['link'] = info['link']
       
   114 	RSS_CACHE['channels'][channel]['description'] = info['description']
       
   115 	for item in items:
       
   116 		if item == RSS_CACHE['channels'][channel]['lastitem']:	
       
   117 			break
       
   118 		else:
       
   119 			UNSENT_HEADLINES.append((channel, item))
       
   120 			print channel + ': Adding item to list.' 
       
   121 	RSS_CACHE['channels'][channel]['lastitem'] = items[0]
       
   122 	rss_update_file()
       
   123 
       
   124 def rss_dispatch_headline(channel, item):
       
   125 	global RSS_CACHE
       
   126 	globaltitle = RSS_CACHE['channels'][channel]['title']
       
   127 	title = rss_remove_html(item['title'])
       
   128 	link = item['link']
       
   129 	description = rss_remove_html(item['description'])
       
   130 	reply = title + ' - '
       
   131 	if description:
       
   132 		reply += description + ' - '
       
   133 	reply += link
       
   134 	for groupchat in RSS_CACHE['channels'][channel]['subscribers']:
       
   135 		if GROUPCHATS.has_key(groupchat):
       
   136 			print channel + ': Sending Headline To: ' + groupchat
       
   137 			msg(groupchat, reply)
       
   138 
       
   139 ################################################################################
       
   140 
       
   141 class RSSHandler(handler.ContentHandler):
       
   142 	def __init__(self, channel):
       
   143 		handler.ContentHandler.__init__(self)
       
   144 
       
   145 		self.channel = channel
       
   146 		self.info = {'title': '', 'link': '', 'description': ''}
       
   147 		self.items = []
       
   148 
       
   149 		self._text = ''
       
   150 		self._parent = None
       
   151 		self._title = ''
       
   152 		self._link = ''
       
   153 		self._description = ''
       
   154 
       
   155 	def startElement(self, name, attrs):
       
   156 		if name == 'channel' or name == 'item':
       
   157 			self._parent = name
       
   158 		self._text = ''
       
   159 
       
   160 	def endElement(self, name):
       
   161 		if self._parent == 'channel':
       
   162 			if name == 'title':
       
   163 				self.info['title'] = self._text
       
   164 			elif name == 'description':
       
   165 				self.info['description'] = self._text
       
   166 			elif name == 'link':
       
   167 				self.info['link'] = self._text
       
   168 
       
   169 		elif self._parent == 'item':
       
   170 			if name == 'title':
       
   171 				self._title = self._text
       
   172 			elif name == 'link':
       
   173 				self._link = self._text
       
   174 			elif name == 'description':
       
   175 				self._description = self._text
       
   176 			elif name == 'item':
       
   177 				self.items.append({'title': self._title, 'link': self._link, 'description': self._description})
       
   178 				self._title = ''
       
   179 				self._link = ''
       
   180 				self._description = ''
       
   181 
       
   182 		if name == 'rss' or name == 'rdf:RDF':
       
   183 			rss_dispatch_headlines(self.channel, self.info, self.items)
       
   184 				
       
   185 	def characters(self, content):
       
   186 		self._text = self._text + content
       
   187 
       
   188 ################################################################################
       
   189 
       
   190 rss_read_file()
       
   191 
       
   192 ################################################################################
       
   193 
       
   194 def handler_rss_start(type, source, parameters):
       
   195 	thread.start_new(rss_query_channels_loop, ())
       
   196 	smsg(type, source, 'Enabled RSS')
       
   197 
       
   198 def handler_rss_stop(type, source, parameters):
       
   199 	rss_end_loop()
       
   200 	smsg(type, source, 'Disabled RSS')
       
   201 
       
   202 def handler_rss_add(type, source, parameters):
       
   203 	if len(string.split(parameters)) > 1:
       
   204 		(name, url) = string.split(parameters)
       
   205 		rss_add_channel(name, url)	
       
   206 		smsg(type, source, 'Added: ' + name + ' - ' + url)
       
   207 	else:
       
   208 		smsg(type, source, 'Invalid Syntax')
       
   209 
       
   210 def handler_rss_remove(type, source, parameters):
       
   211 	if len(string.split(parameters)) > 0:
       
   212 		name = parameters
       
   213 		rss_remove_channel(name)	
       
   214 		smsg(type, source, 'Removed: ' + name)
       
   215 	else:
       
   216 		smsg(type, source, 'Invalid Syntax')
       
   217 
       
   218 def handler_rss_subscribe(type, source, parameters):
       
   219 	if len(string.split(parameters)) > 1:
       
   220 		(name, jid) = string.split(parameters)
       
   221 		rss_subscribe(name, jid)	
       
   222 		smsg(type, source, 'Subscribed: ' + jid + ' to ' + name)
       
   223 	else:
       
   224 		smsg(type, source, 'Invalid Syntax')
       
   225 
       
   226 def handler_rss_unsubscribe(type, source, parameters):
       
   227 	if len(string.split(parameters)) > 1:
       
   228 		(name, jid) = string.split(parameters)
       
   229 		rss_unsubscribe(name, jid)	
       
   230 		smsg(type, source, 'Unsubscribed: ' + jid + ' from ' + name)
       
   231 	else:
       
   232 		smsg(type, source, 'Invalid Syntax')
       
   233 
       
   234 def handler_rss_info(type, source, parameters):
       
   235 	if parameters.strip():
       
   236 		name = parameters.strip()
       
   237 		message = name + ' - ' + RSS_CACHE['channels'][name]['url'] + ' - ' + RSS_CACHE['channels'][name]['title'] + ' - ' + RSS_CACHE['channels'][name]['link'] + ' - ' + RSS_CACHE['channels'][name]['description']
       
   238 		message += ' - Subscribers:'
       
   239 		for subscriber in RSS_CACHE['channels'][name]['subscribers']:
       
   240 			message += ' ' + subscriber
       
   241 		if not len(RSS_CACHE['channels'][name]['subscribers']):
       
   242 			message += 'NONE'
       
   243 		smsg(type, source, message)
       
   244 	else:
       
   245 		message = 'Channels:'
       
   246 		for channel in RSS_CACHE['channels'].keys():
       
   247 			message += ' ' + channel
       
   248 		smsg(type, source, message)
       
   249 
       
   250 register_command_handler(handler_rss_start, '!rss_start', 100, 'Enables the RSS headline feature.', '!rss_start', ['!rss_start'])
       
   251 register_command_handler(handler_rss_stop, '!rss_stop', 100, 'Disables the RSS headline feature.', '!rss_stop', ['!rss_stop'])
       
   252 register_command_handler(handler_rss_add, '!rss_add', 100, 'Adds an RSS channel.', '!rss_add <name> <url>', ['!rss_add slashdot http://www.slashdot.org/slashdot.rdf'])
       
   253 register_command_handler(handler_rss_remove, '!rss_remove', 100, 'Removes an RSS channel.', '!rss_add <name>', ['!rss_remove slashdot'])
       
   254 register_command_handler(handler_rss_subscribe, '!rss_subscribe', 100, 'Subscribes a channel to an RSS channel.', '!rss_subscribe <name> <jid>', ['!rss_subscribe slashdot jabber@conference.jabber.org'])
       
   255 register_command_handler(handler_rss_unsubscribe, '!rss_unsubscribe', 100, 'Unsubscribes a channel from an RSS channel.', '!rss_unsubscribe <name> <jid>', ['!rss_unsubscribe slashdot jabber@conference.jabber.org'])
       
   256 register_command_handler(handler_rss_info, '!rss_info', 0, 'Requests information on specified RSS channel or gets the list of channels.', '!rss_info [name]', ['!rss_info slashdot', '!rss_info'])
       
   257