|
1 #$ neutron_plugin 01 |
|
2 |
|
3 from xml.sax import make_parser, handler |
|
4 |
|
5 RSS_CACHE_FILE = 'dynamic/RSS_CACHE.txt' |
|
6 RSS_INTERVAL = 30 |
|
7 RSS_QUERY_DELAY = 10 |
|
8 RSS_ITEM_DELAY = 120 |
|
9 |
|
10 RSS_CACHE = {} |
|
11 last_query = 0 |
|
12 UNSENT_HEADLINES = [] |
|
13 RSS_IS_ENABLED = 0 |
|
14 |
|
15 initialize_file(RSS_CACHE_FILE, "{'channels': {}}") |
|
16 |
|
17 ################################################################################ |
|
18 |
|
19 import re |
|
20 def rss_remove_html(text): |
|
21 exp = re.compile('<[^>]*>') |
|
22 text = exp.sub('', text) |
|
23 notags = text.replace('<', '<').replace('>', '>') |
|
24 noescape = notags.replace('&', '&').replace('<', '<').replace('>', '>').replace('"', '"') |
|
25 noescape = noescape.replace('<p>', '') |
|
26 noescape = noescape.replace('</p>', '') |
|
27 noescape = noescape.replace('<p />', '').replace('<p/>', '') |
|
28 return noescape |
|
29 |
|
30 """ OLD CODE: REMOVE LATER IF NEW FUNCTION (added 2005-10-12) WORKS |
|
31 def rss_remove_html(text): |
|
32 notags = text.replace('<', '<').replace('>', '>') |
|
33 noescape = notags.replace('&', '&').replace('<', '<').replace('>', '>').replace('"', '"') |
|
34 noescape = noescape.replace('<p>', '') |
|
35 noescape = noescape.replace('</p>', '') |
|
36 noescape = noescape.replace('<p />', '').replace('<p/>', '') |
|
37 return noescape |
|
38 """ |
|
39 |
|
40 def rss_update_file(): |
|
41 global RSS_CACHE |
|
42 write_file(RSS_CACHE_FILE, str(RSS_CACHE)) |
|
43 |
|
44 def rss_read_file(): |
|
45 global RSS_CACHE |
|
46 RSS_CACHE = eval(read_file(RSS_CACHE_FILE)) |
|
47 |
|
48 def rss_add_channel(name, url): |
|
49 global RSS_CACHE |
|
50 if not RSS_CACHE['channels'].has_key(name): |
|
51 RSS_CACHE['channels'][name] = {'url': url, 'lastitem': '', 'subscribers': [], 'title': name, 'link': '', 'description': name} |
|
52 rss_update_file() |
|
53 else: |
|
54 RSS_CACHE['channels'][name]['url'] = url |
|
55 |
|
56 def rss_remove_channel(name): |
|
57 global RSS_CACHE |
|
58 if RSS_CACHE['channels'].has_key(name): |
|
59 del RSS_CACHE['channels'][name] |
|
60 rss_update_file() |
|
61 |
|
62 def rss_subscribe(name, jid): |
|
63 global RSS_CACHE |
|
64 if RSS_CACHE['channels'].has_key(name): |
|
65 if not jid in RSS_CACHE['channels'][name]['subscribers']: |
|
66 RSS_CACHE['channels'][name]['subscribers'].append(jid) |
|
67 rss_update_file() |
|
68 |
|
69 def rss_unsubscribe(name, jid): |
|
70 global RSS_CACHE |
|
71 if RSS_CACHE['channels'].has_key(name): |
|
72 if jid in RSS_CACHE['channels'][name]['subscribers']: |
|
73 RSS_CACHE['channels'][name]['subscribers'].remove(jid) |
|
74 rss_update_file() |
|
75 |
|
76 def rss_query_channels_loop(): |
|
77 RSS_IS_ENABLED = 1 |
|
78 while RSS_IS_ENABLED: |
|
79 rss_query_channels() |
|
80 time.sleep(RSS_ITEM_DELAY) |
|
81 if len(UNSENT_HEADLINES): |
|
82 random.shuffle(UNSENT_HEADLINES) |
|
83 (channel, item) = UNSENT_HEADLINES.pop() |
|
84 rss_dispatch_headline(channel, item) |
|
85 |
|
86 def rss_end_loop(): |
|
87 RSS_IS_ENABLED = 0 |
|
88 |
|
89 def rss_query_channels(): |
|
90 global RSS_CACHE |
|
91 global last_query |
|
92 if time.time() > last_query + (RSS_INTERVAL * 60): |
|
93 print 'Querying Channels' |
|
94 last_query = time.time() |
|
95 for channel in RSS_CACHE['channels']: |
|
96 rss_query_channel(channel) |
|
97 time.sleep(RSS_QUERY_DELAY) |
|
98 print 'Finished Querying Headlines' |
|
99 |
|
100 def rss_query_channel(channel): |
|
101 print 'Querying: "' + channel + '"' |
|
102 parser = make_parser() |
|
103 parser.setContentHandler(RSSHandler(channel)) |
|
104 try: |
|
105 parser.parse(RSS_CACHE['channels'][channel]['url']) |
|
106 except: |
|
107 #raise |
|
108 print 'error parsing: ' + channel |
|
109 |
|
110 def rss_dispatch_headlines(channel, info, items): |
|
111 global RSS_CACHE |
|
112 RSS_CACHE['channels'][channel]['title'] = info['title'] |
|
113 RSS_CACHE['channels'][channel]['link'] = info['link'] |
|
114 RSS_CACHE['channels'][channel]['description'] = info['description'] |
|
115 for item in items: |
|
116 if item == RSS_CACHE['channels'][channel]['lastitem']: |
|
117 break |
|
118 else: |
|
119 UNSENT_HEADLINES.append((channel, item)) |
|
120 print channel + ': Adding item to list.' |
|
121 RSS_CACHE['channels'][channel]['lastitem'] = items[0] |
|
122 rss_update_file() |
|
123 |
|
124 def rss_dispatch_headline(channel, item): |
|
125 global RSS_CACHE |
|
126 globaltitle = RSS_CACHE['channels'][channel]['title'] |
|
127 title = rss_remove_html(item['title']) |
|
128 link = item['link'] |
|
129 description = rss_remove_html(item['description']) |
|
130 reply = title + ' - ' |
|
131 if description: |
|
132 reply += description + ' - ' |
|
133 reply += link |
|
134 for groupchat in RSS_CACHE['channels'][channel]['subscribers']: |
|
135 if GROUPCHATS.has_key(groupchat): |
|
136 print channel + ': Sending Headline To: ' + groupchat |
|
137 msg(groupchat, reply) |
|
138 |
|
139 ################################################################################ |
|
140 |
|
141 class RSSHandler(handler.ContentHandler): |
|
142 def __init__(self, channel): |
|
143 handler.ContentHandler.__init__(self) |
|
144 |
|
145 self.channel = channel |
|
146 self.info = {'title': '', 'link': '', 'description': ''} |
|
147 self.items = [] |
|
148 |
|
149 self._text = '' |
|
150 self._parent = None |
|
151 self._title = '' |
|
152 self._link = '' |
|
153 self._description = '' |
|
154 |
|
155 def startElement(self, name, attrs): |
|
156 if name == 'channel' or name == 'item': |
|
157 self._parent = name |
|
158 self._text = '' |
|
159 |
|
160 def endElement(self, name): |
|
161 if self._parent == 'channel': |
|
162 if name == 'title': |
|
163 self.info['title'] = self._text |
|
164 elif name == 'description': |
|
165 self.info['description'] = self._text |
|
166 elif name == 'link': |
|
167 self.info['link'] = self._text |
|
168 |
|
169 elif self._parent == 'item': |
|
170 if name == 'title': |
|
171 self._title = self._text |
|
172 elif name == 'link': |
|
173 self._link = self._text |
|
174 elif name == 'description': |
|
175 self._description = self._text |
|
176 elif name == 'item': |
|
177 self.items.append({'title': self._title, 'link': self._link, 'description': self._description}) |
|
178 self._title = '' |
|
179 self._link = '' |
|
180 self._description = '' |
|
181 |
|
182 if name == 'rss' or name == 'rdf:RDF': |
|
183 rss_dispatch_headlines(self.channel, self.info, self.items) |
|
184 |
|
185 def characters(self, content): |
|
186 self._text = self._text + content |
|
187 |
|
188 ################################################################################ |
|
189 |
|
190 rss_read_file() |
|
191 |
|
192 ################################################################################ |
|
193 |
|
194 def handler_rss_start(type, source, parameters): |
|
195 thread.start_new(rss_query_channels_loop, ()) |
|
196 smsg(type, source, 'Enabled RSS') |
|
197 |
|
198 def handler_rss_stop(type, source, parameters): |
|
199 rss_end_loop() |
|
200 smsg(type, source, 'Disabled RSS') |
|
201 |
|
202 def handler_rss_add(type, source, parameters): |
|
203 if len(string.split(parameters)) > 1: |
|
204 (name, url) = string.split(parameters) |
|
205 rss_add_channel(name, url) |
|
206 smsg(type, source, 'Added: ' + name + ' - ' + url) |
|
207 else: |
|
208 smsg(type, source, 'Invalid Syntax') |
|
209 |
|
210 def handler_rss_remove(type, source, parameters): |
|
211 if len(string.split(parameters)) > 0: |
|
212 name = parameters |
|
213 rss_remove_channel(name) |
|
214 smsg(type, source, 'Removed: ' + name) |
|
215 else: |
|
216 smsg(type, source, 'Invalid Syntax') |
|
217 |
|
218 def handler_rss_subscribe(type, source, parameters): |
|
219 if len(string.split(parameters)) > 1: |
|
220 (name, jid) = string.split(parameters) |
|
221 rss_subscribe(name, jid) |
|
222 smsg(type, source, 'Subscribed: ' + jid + ' to ' + name) |
|
223 else: |
|
224 smsg(type, source, 'Invalid Syntax') |
|
225 |
|
226 def handler_rss_unsubscribe(type, source, parameters): |
|
227 if len(string.split(parameters)) > 1: |
|
228 (name, jid) = string.split(parameters) |
|
229 rss_unsubscribe(name, jid) |
|
230 smsg(type, source, 'Unsubscribed: ' + jid + ' from ' + name) |
|
231 else: |
|
232 smsg(type, source, 'Invalid Syntax') |
|
233 |
|
234 def handler_rss_info(type, source, parameters): |
|
235 if parameters.strip(): |
|
236 name = parameters.strip() |
|
237 message = name + ' - ' + RSS_CACHE['channels'][name]['url'] + ' - ' + RSS_CACHE['channels'][name]['title'] + ' - ' + RSS_CACHE['channels'][name]['link'] + ' - ' + RSS_CACHE['channels'][name]['description'] |
|
238 message += ' - Subscribers:' |
|
239 for subscriber in RSS_CACHE['channels'][name]['subscribers']: |
|
240 message += ' ' + subscriber |
|
241 if not len(RSS_CACHE['channels'][name]['subscribers']): |
|
242 message += 'NONE' |
|
243 smsg(type, source, message) |
|
244 else: |
|
245 message = 'Channels:' |
|
246 for channel in RSS_CACHE['channels'].keys(): |
|
247 message += ' ' + channel |
|
248 smsg(type, source, message) |
|
249 |
|
250 register_command_handler(handler_rss_start, '!rss_start', 100, 'Enables the RSS headline feature.', '!rss_start', ['!rss_start']) |
|
251 register_command_handler(handler_rss_stop, '!rss_stop', 100, 'Disables the RSS headline feature.', '!rss_stop', ['!rss_stop']) |
|
252 register_command_handler(handler_rss_add, '!rss_add', 100, 'Adds an RSS channel.', '!rss_add <name> <url>', ['!rss_add slashdot http://www.slashdot.org/slashdot.rdf']) |
|
253 register_command_handler(handler_rss_remove, '!rss_remove', 100, 'Removes an RSS channel.', '!rss_add <name>', ['!rss_remove slashdot']) |
|
254 register_command_handler(handler_rss_subscribe, '!rss_subscribe', 100, 'Subscribes a channel to an RSS channel.', '!rss_subscribe <name> <jid>', ['!rss_subscribe slashdot jabber@conference.jabber.org']) |
|
255 register_command_handler(handler_rss_unsubscribe, '!rss_unsubscribe', 100, 'Unsubscribes a channel from an RSS channel.', '!rss_unsubscribe <name> <jid>', ['!rss_unsubscribe slashdot jabber@conference.jabber.org']) |
|
256 register_command_handler(handler_rss_info, '!rss_info', 0, 'Requests information on specified RSS channel or gets the list of channels.', '!rss_info [name]', ['!rss_info slashdot', '!rss_info']) |
|
257 |