mod_pubsub_feeds: Switch to use util.xml for parsing feeds and include RSS to Atom translation code from lua-feeds
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mod_pubsub_feeds/feeds.lib.lua Sun Mar 20 12:32:45 2016 +0100
@@ -0,0 +1,80 @@
+local st = require "util.stanza";
+-- RSS->Atom translator
+-- http://code.matthewwild.co.uk/lua-feeds/
+
+-- Helpers to translate item child elements
+local rss2atom = {};
+function rss2atom.title(atom_entry, tag)
+ atom_entry:tag("title"):text(tag:get_text()):up();
+end
+
+function rss2atom.link(atom_entry, tag)
+ atom_entry:tag("link", { href = tag:get_text() }):up();
+end
+
+function rss2atom.author(atom_entry, tag)
+ atom_entry:tag("author")
+ :tag("email"):text(tag:get_text()):up()
+ :up();
+end
+
+function rss2atom.guid(atom_entry, tag)
+ atom_entry:tag("id"):text(tag:get_text()):up();
+end
+
+function rss2atom.category(atom_entry, tag)
+ atom_entry:tag("category", { term = tag:get_text(), scheme = tag.attr.domain }):up();
+end
+
+function rss2atom.description(atom_entry, tag)
+ atom_entry:tag("summary"):text(tag:get_text()):up();
+end
+
+local months = {
+ jan = "01", feb = "02", mar = "03", apr = "04", may = "05", jun = "06";
+ jul = "07", aug = "08", sep = "09", oct = "10", nov = "11", dec = "12";
+};
+
+function rss2atom.pubDate(atom_entry, tag)
+ local pubdate = tag:get_text():gsub("^%a+,", ""):gsub("^%s*", "");
+ local date, month, year, hour, minute, second, zone =
+ pubdate:match("^(%d%d?) (%a+) (%d+) (%d+):(%d+):?(%d*) ?(.*)$");
+ if not date then return; end
+ if #date == 1 then
+ date = "0"..date;
+ end
+ month = months[month:sub(1,3):lower()];
+ if #year == 2 then -- GAH!
+ if tonumber(year) > 80 then
+ year = "19"..year;
+ else
+ year = "20"..year;
+ end
+ end
+ if zone == "UT" or zone == "GMT" then zone = "Z"; end
+ if #second == 0 then
+ second = "00";
+ end
+ local date_string = string.format("%s-%s-%sT%s:%s:%s%s", year, month, date, hour, minute, second, zone);
+ atom_entry:tag("published"):text(date_string):up();
+end
+
+-- Translate a single item to atom
+local function translate_rss(rss_feed)
+ local feed = st.stanza("feed", { xmlns = "http://www.w3.org/2005/Atom" });
+ local channel = rss_feed:get_child("channel");
+ -- TODO channel properties
+ feed:tag("entry");
+ for item in channel:childtags("item") do
+ for tag in rss_item:childtags() do
+ local translator = rss2atom[tag.name];
+ if translator then
+ translator(feed, tag);
+ end
+ end
+ end
+ feed:reset();
+ return feed;
+end
+
+return { translate_rss = translate_rss }
--- a/mod_pubsub_feeds/mod_pubsub_feeds.lua Fri Mar 18 09:59:42 2016 +0000
+++ b/mod_pubsub_feeds/mod_pubsub_feeds.lua Sun Mar 20 12:32:45 2016 +0100
@@ -1,7 +1,5 @@
-- Fetches Atom feeds and publishes to PubSub nodes
--
--- Depends: http://code.matthewwild.co.uk/lua-feeds
---
-- Config:
-- Component "pubsub.example.com" "pubsub"
-- modules_enabled = {
@@ -21,12 +19,23 @@
local dt_parse, dt_datetime = require "util.datetime".parse, require "util.datetime".datetime;
local uuid = require "util.uuid".generate;
local hmac_sha1 = require "util.hashes".hmac_sha1;
-local parse_feed = require "feeds".feed_from_string;
+local parse_xml = require "uit.xml".parse;
local st = require "util.stanza";
---local dump = require"util.serialization".serialize;
+local translate_rss = module:require("feeds").translate_rss;
local xmlns_atom = "http://www.w3.org/2005/Atom";
+local function parse_feed(data)
+ local feed, err = parse_xml(data);
+ if not feed then return feed, err; end
+ if feed.attr.xmlns == xmlns_atom then
+ return feed;
+ elseif feed.attr.xmlns == nil and feed.name == "rss" then
+ return translate_rss(feed);
+ end
+ return nil, "unsupported-format";
+end
+
local use_pubsubhubub = module:get_option_boolean("use_pubsubhubub", true);
if use_pubsubhubub then
module:depends"http";
@@ -75,7 +84,7 @@
local node = item.node;
module:log("debug", "parsing %d bytes of data in node %s", #item.data or 0, node)
local feed = parse_feed(item.data);
- for _, entry in ipairs(feed) do
+ for entry in feed:childtags("entry") do
entry.attr.xmlns = xmlns_atom;
local e_published = entry:get_child_text("published");
@@ -119,11 +128,12 @@
end
if use_pubsubhubub and not item.subscription then
--module:log("debug", "check if %s has a hub", item.node);
- local hub = item.hub or feed.links and feed.links.hub;
- if hub then
- item.hub = hub;
- module:log("debug", "%s has a hub: %s", item.node, item.hub);
- subscribe(item);
+ for link in feed:childtags("link") do
+ if link.attr.rel == "hub" then
+ item.hub = link.attr.href;
+ module:log("debug", "Node %s has a hub: %s", item.node, item.hub);
+ return subscribe(item);
+ end
end
end
end