mod_pubsub_summary/mod_pubsub_summary.lua
author Kim Alvefur <zash@zash.se>
Sun, 03 Mar 2024 11:23:40 +0100
changeset 5857 97c9b76867ca
parent 5139 35085e0d52ad
permissions -rw-r--r--
mod_log_ringbuffer: Detach event handlers on logging reload (thanks Menel) Otherwise the global event handlers accumulate, one added each time logging is reoladed, and each invocation of the signal or event triggers one dump of each created ringbuffer.
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
4430
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
     1
-- No, not trying to parse HTML here. It's an illusion. Just trying to read RSS feeds.
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
     2
--
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
     3
-- Compose a textual representation of Atom payloads
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
     4
module:hook("pubsub-summary/http://www.w3.org/2005/Atom", function (event)
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
     5
	local payload = event.payload;
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
     6
	local title = payload:get_child_text("title");
5139
35085e0d52ad mod_pubsub_summary: Trim preceding and trailing whitespace from title
Kim Alvefur <zash@zash.se>
parents: 5135
diff changeset
     7
	if title then title = title:gsub("^%s+", ""):gsub("%s+$", ""); end
4439
a620bf249e63 mod_pubsub_summary: Explain why it picks content or summary in a comment
Kim Alvefur <zash@zash.se>
parents: 4430
diff changeset
     8
	-- Note: This prefers content over summary, it was made for a news feed where
a620bf249e63 mod_pubsub_summary: Explain why it picks content or summary in a comment
Kim Alvefur <zash@zash.se>
parents: 4430
diff changeset
     9
	-- the interesting stuff was in the content and the summary was .. meh.
4430
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    10
	local content_tag = payload:get_child("content") or payload:get_child("summary");
4511
86a97e7572b2 mod_pubsub_summary: Fix traceback when <content> not included
Kim Alvefur <zash@zash.se>
parents: 4442
diff changeset
    11
	local content = content_tag and content_tag:get_text();
86a97e7572b2 mod_pubsub_summary: Fix traceback when <content> not included
Kim Alvefur <zash@zash.se>
parents: 4442
diff changeset
    12
	if content and content_tag.attr.type == "html" then
4430
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    13
		content = content:gsub("\n*<p[^>]*>\n*(.-)\n*</p>\n*", "%1\n\n");
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    14
		content = content:gsub("<li>(.-)</li>\n", "* %1\n");
4517
ade2064160e3 mod_pubsub_summary: Fix to not strip inline links
Kim Alvefur <zash@zash.se>
parents: 4511
diff changeset
    15
		content = content:gsub("<a[^>]*href=[\"'](.-)[\"'][^>]*>(.-)</a>", "\1%1\2%2\3");
4430
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    16
		content = content:gsub("<b>(.-)</b>", "*%1*");
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    17
		content = content:gsub("<strong>(.-)</strong>", "*%1*");
4604
98864dffb231 mod_pubsub_summary: Fix conversion of emphasis _like this_ per XEP-0393
Kim Alvefur <zash@zash.se>
parents: 4517
diff changeset
    18
		content = content:gsub("<em>(.-)</em>", "_%1_");
98864dffb231 mod_pubsub_summary: Fix conversion of emphasis _like this_ per XEP-0393
Kim Alvefur <zash@zash.se>
parents: 4517
diff changeset
    19
		content = content:gsub("<i>(.-)</i>", "_%1_");
4430
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    20
		content = content:gsub("<img[^>]*src=[\"'](.-)[\"'][^>]*>", " %1 "); -- TODO alt= would have been nice to grab
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    21
		content = content:gsub("<br[^>]*>", "\n");
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    22
		content = content:gsub("<[^>]+>", "");
4517
ade2064160e3 mod_pubsub_summary: Fix to not strip inline links
Kim Alvefur <zash@zash.se>
parents: 4511
diff changeset
    23
		content = content:gsub("\1(.-)\2(.-)\3", "%2 <%1>");
4430
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    24
		content = content:gsub("^%s*", ""):gsub("%s*$", "");
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    25
		content = content:gsub("\n\n\n+", "\n\n");
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    26
		content = content:gsub("&(%w+);", {
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    27
				apos = "'";
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    28
				quot = '"';
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    29
				lt = "<";
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    30
				gt = ">";
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    31
				amp = "&";
4613
fcfe691d6322 mod_pubsub_summary: Use pre-escaped UTF-8 sequence for compat
Kim Alvefur <zash@zash.se>
parents: 4604
diff changeset
    32
				nbsp = "\194\160"; -- U+00A0
4430
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    33
			});
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    34
	end
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    35
	local summary;
4441
09657f758f53 mod_pubsub_summary: Skip adding title if already part of summary
Kim Alvefur <zash@zash.se>
parents: 4440
diff changeset
    36
	if title and content and content:sub(1, #title) ~= title then
4442
2bb11055e4bb mod_pubsub_summary: Make titles *bold* to stand out more
Kim Alvefur <zash@zash.se>
parents: 4441
diff changeset
    37
		summary = "*" .. title .. "*\n\n" .. content;
4430
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    38
	elseif title or content then
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    39
		summary = content or title;
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    40
	end
4440
07529dba102d mod_pubsub_summary: Include multiple links (e.g. podcast media)
Kim Alvefur <zash@zash.se>
parents: 4439
diff changeset
    41
	for link in payload:childtags("link") do
07529dba102d mod_pubsub_summary: Include multiple links (e.g. podcast media)
Kim Alvefur <zash@zash.se>
parents: 4439
diff changeset
    42
		if link and link.attr.href and link.attr.href ~= content then
07529dba102d mod_pubsub_summary: Include multiple links (e.g. podcast media)
Kim Alvefur <zash@zash.se>
parents: 4439
diff changeset
    43
			summary = (summary and summary .. "\n" or "") .. link.attr.href;
5133
cde38b7de04a mod_pubsub_summary: Hide link relation when value is "alternate"
Kim Alvefur <zash@zash.se>
parents: 4613
diff changeset
    44
			if link.attr.rel and link.attr.rel ~= "alternate" then summary = summary .. " [" .. link.attr.rel .. "]" end
4440
07529dba102d mod_pubsub_summary: Include multiple links (e.g. podcast media)
Kim Alvefur <zash@zash.se>
parents: 4439
diff changeset
    45
		end
4430
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    46
	end
5135
82e7251d4f52 mod_pubsub_summary: Render geo:-URI from OASIS emergency broadcasts
Kim Alvefur <zash@zash.se>
parents: 5133
diff changeset
    47
	for area in payload:childtags("area", "urn:oasis:names:tc:emergency:cap:1.2") do
82e7251d4f52 mod_pubsub_summary: Render geo:-URI from OASIS emergency broadcasts
Kim Alvefur <zash@zash.se>
parents: 5133
diff changeset
    48
		local pos = area:get_child_text("circle");
82e7251d4f52 mod_pubsub_summary: Render geo:-URI from OASIS emergency broadcasts
Kim Alvefur <zash@zash.se>
parents: 5133
diff changeset
    49
		if pos then
82e7251d4f52 mod_pubsub_summary: Render geo:-URI from OASIS emergency broadcasts
Kim Alvefur <zash@zash.se>
parents: 5133
diff changeset
    50
			summary = summary .. "\n" .. "geo:"..pos:match("[%d.,]+");
82e7251d4f52 mod_pubsub_summary: Render geo:-URI from OASIS emergency broadcasts
Kim Alvefur <zash@zash.se>
parents: 5133
diff changeset
    51
		end
82e7251d4f52 mod_pubsub_summary: Render geo:-URI from OASIS emergency broadcasts
Kim Alvefur <zash@zash.se>
parents: 5133
diff changeset
    52
	end
4430
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    53
	return summary;
3fe2c264aac4 mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff changeset
    54
end, 1);