mod_pubsub_feed/mod_pubsub_feed.lua
author Kim Alvefur <zash@zash.se>
Sat, 27 Nov 2010 20:44:22 +0100
changeset 279 aa0df3db4901
parent 278 653c1826739e
child 299 801066bf5793
permissions -rw-r--r--
mod_pubsub_feed: Wrap entry in a item element.
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
278
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
     1
-- Fetches Atom feeds and publishes to PubSub nodes
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
     2
--
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
     3
-- Config:
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
     4
-- Component "pubsub.example.com" "pubsub"
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
     5
-- modules_enabled = {
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
     6
--   "pubsub_feed";
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
     7
-- }
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
     8
-- feeds = { -- node -> url
279
aa0df3db4901 mod_pubsub_feed: Wrap entry in a item element.
Kim Alvefur <zash@zash.se>
parents: 278
diff changeset
     9
--   prosody_blog = "http://blog.prosody.im/feed/atom.xml";
278
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    10
-- }
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    11
-- feed_pull_interval = 20 -- minutes
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    12
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    13
local modules = hosts[module.host].modules;
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    14
if not modules.pubsub then
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    15
	module:log("warn", "Pubsub needs to be loaded on this host");
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    16
end
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    17
local add_task = require "util.timer".add_task;
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    18
local date, time = os.date, os.time;
279
aa0df3db4901 mod_pubsub_feed: Wrap entry in a item element.
Kim Alvefur <zash@zash.se>
parents: 278
diff changeset
    19
local dt_parse, dt_datetime = require "util.datetime".parse, require "util.datetime".datetime;
278
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    20
local http = require "net.http";
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    21
local parse_feed = require "feeds".feed_from_string;
279
aa0df3db4901 mod_pubsub_feed: Wrap entry in a item element.
Kim Alvefur <zash@zash.se>
parents: 278
diff changeset
    22
local st = require "util.stanza";
278
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    23
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    24
local config = module:get_option("feeds") or {
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    25
	planet_jabber = "http://planet.jabber.org/atom.xml";
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    26
	prosody_blog = "http://blog.prosody.im/feed/atom.xml";
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    27
};
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    28
local refresh_interval = (module:get_option("feed_pull_interval") or 15) * 60;
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    29
local feed_list = { }
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    30
for node, url in pairs(config) do
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    31
	feed_list[node] = { url = url };
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    32
end
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    33
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    34
local function update(item, callback)
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    35
	local headers = { };
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    36
	if item.data and item.last_update then
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    37
		headers["If-Modified-Since"] = date("!%a, %d %b %Y %T %Z", item.last_update);
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    38
	end
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    39
	http.request(item.url, {headers = headers}, function(data, code, req) 
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    40
		if code == 200 then
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    41
			item.data = data;
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    42
			callback(item)
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    43
			item.last_update = time();
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    44
		end
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    45
		if code == 304 then
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    46
			item.last_update = time();
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    47
		end
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    48
	end);
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    49
end
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    50
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    51
local actor = module.host.."/"..module.name;
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    52
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    53
local function refresh_feeds()
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    54
	for node, item in pairs(feed_list) do
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    55
		update(item, function(item)
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    56
			local feed = parse_feed(item.data);
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    57
			module:log("debug", "node: %s", node);
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    58
			for _, entry in ipairs(feed) do
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    59
				entry.attr.xmlns = "http://www.w3.org/2005/Atom";
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    60
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    61
				local e_published = entry:get_child("published");
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    62
				e_published = e_published and e_published[1];
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    63
				e_published = e_published and dt_parse(e_published);
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    64
				local e_updated = entry:get_child("updated");
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    65
				e_updated = e_updated and e_updated[1];
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    66
				e_updated = e_updated and dt_parse(e_updated);
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    67
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    68
				local timestamp = e_published or e_updated or nil;
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    69
				module:log("debug", "timestamp is %s, item.last_update is %s", tostring(timestamp), tostring(item.last_update));
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    70
				if not timestamp or not item.last_update or timestamp > item.last_update then
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    71
					local id = entry:get_child("id");
279
aa0df3db4901 mod_pubsub_feed: Wrap entry in a item element.
Kim Alvefur <zash@zash.se>
parents: 278
diff changeset
    72
					id = id[1] or item.url.."#"..dt_datetime(timestamp); -- Missing id, so make one up
aa0df3db4901 mod_pubsub_feed: Wrap entry in a item element.
Kim Alvefur <zash@zash.se>
parents: 278
diff changeset
    73
					local item = st.stanza("item", { id = id }):add_child(entry);
aa0df3db4901 mod_pubsub_feed: Wrap entry in a item element.
Kim Alvefur <zash@zash.se>
parents: 278
diff changeset
    74
278
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    75
					module:log("debug", "publishing to %s, id %s", node, id);
279
aa0df3db4901 mod_pubsub_feed: Wrap entry in a item element.
Kim Alvefur <zash@zash.se>
parents: 278
diff changeset
    76
					modules.pubsub.service:publish(node, actor, id, item)
278
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    77
				end
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    78
			end
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    79
		end);
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    80
	end
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    81
	return refresh_interval;
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    82
end
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    83
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    84
function init()
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    85
	add_task(0, refresh_feeds);
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    86
end
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    87
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    88
if prosody.start_time then -- already started
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    89
	init();
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    90
else
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    91
	prosody.events.add_handler("server-started", init);
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    92
end
653c1826739e mod_pubsub_feed: Fetches Atom feeds and publishes to PubSub
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    93