author | Kim Alvefur <zash@zash.se> |
Thu, 29 Feb 2024 18:05:09 +0100 | |
changeset 5853 | 84a784bc4096 |
parent 5139 | 35085e0d52ad |
permissions | -rw-r--r-- |
4430
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
1 |
-- No, not trying to parse HTML here. It's an illusion. Just trying to read RSS feeds. |
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
2 |
-- |
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
3 |
-- Compose a textual representation of Atom payloads |
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
4 |
module:hook("pubsub-summary/http://www.w3.org/2005/Atom", function (event) |
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
5 |
local payload = event.payload; |
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
6 |
local title = payload:get_child_text("title"); |
5139
35085e0d52ad
mod_pubsub_summary: Trim preceding and trailing whitespace from title
Kim Alvefur <zash@zash.se>
parents:
5135
diff
changeset
|
7 |
if title then title = title:gsub("^%s+", ""):gsub("%s+$", ""); end |
4439
a620bf249e63
mod_pubsub_summary: Explain why it picks content or summary in a comment
Kim Alvefur <zash@zash.se>
parents:
4430
diff
changeset
|
8 |
-- Note: This prefers content over summary, it was made for a news feed where |
a620bf249e63
mod_pubsub_summary: Explain why it picks content or summary in a comment
Kim Alvefur <zash@zash.se>
parents:
4430
diff
changeset
|
9 |
-- the interesting stuff was in the content and the summary was .. meh. |
4430
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
10 |
local content_tag = payload:get_child("content") or payload:get_child("summary"); |
4511
86a97e7572b2
mod_pubsub_summary: Fix traceback when <content> not included
Kim Alvefur <zash@zash.se>
parents:
4442
diff
changeset
|
11 |
local content = content_tag and content_tag:get_text(); |
86a97e7572b2
mod_pubsub_summary: Fix traceback when <content> not included
Kim Alvefur <zash@zash.se>
parents:
4442
diff
changeset
|
12 |
if content and content_tag.attr.type == "html" then |
4430
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
13 |
content = content:gsub("\n*<p[^>]*>\n*(.-)\n*</p>\n*", "%1\n\n"); |
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
14 |
content = content:gsub("<li>(.-)</li>\n", "* %1\n"); |
4517
ade2064160e3
mod_pubsub_summary: Fix to not strip inline links
Kim Alvefur <zash@zash.se>
parents:
4511
diff
changeset
|
15 |
content = content:gsub("<a[^>]*href=[\"'](.-)[\"'][^>]*>(.-)</a>", "\1%1\2%2\3"); |
4430
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
16 |
content = content:gsub("<b>(.-)</b>", "*%1*"); |
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
17 |
content = content:gsub("<strong>(.-)</strong>", "*%1*"); |
4604
98864dffb231
mod_pubsub_summary: Fix conversion of emphasis _like this_ per XEP-0393
Kim Alvefur <zash@zash.se>
parents:
4517
diff
changeset
|
18 |
content = content:gsub("<em>(.-)</em>", "_%1_"); |
98864dffb231
mod_pubsub_summary: Fix conversion of emphasis _like this_ per XEP-0393
Kim Alvefur <zash@zash.se>
parents:
4517
diff
changeset
|
19 |
content = content:gsub("<i>(.-)</i>", "_%1_"); |
4430
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
20 |
content = content:gsub("<img[^>]*src=[\"'](.-)[\"'][^>]*>", " %1 "); -- TODO alt= would have been nice to grab |
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
21 |
content = content:gsub("<br[^>]*>", "\n"); |
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
22 |
content = content:gsub("<[^>]+>", ""); |
4517
ade2064160e3
mod_pubsub_summary: Fix to not strip inline links
Kim Alvefur <zash@zash.se>
parents:
4511
diff
changeset
|
23 |
content = content:gsub("\1(.-)\2(.-)\3", "%2 <%1>"); |
4430
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
24 |
content = content:gsub("^%s*", ""):gsub("%s*$", ""); |
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
25 |
content = content:gsub("\n\n\n+", "\n\n"); |
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
26 |
content = content:gsub("&(%w+);", { |
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
27 |
apos = "'"; |
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
28 |
quot = '"'; |
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
29 |
lt = "<"; |
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
30 |
gt = ">"; |
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
31 |
amp = "&"; |
4613
fcfe691d6322
mod_pubsub_summary: Use pre-escaped UTF-8 sequence for compat
Kim Alvefur <zash@zash.se>
parents:
4604
diff
changeset
|
32 |
nbsp = "\194\160"; -- U+00A0 |
4430
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
33 |
}); |
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
34 |
end |
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
35 |
local summary; |
4441
09657f758f53
mod_pubsub_summary: Skip adding title if already part of summary
Kim Alvefur <zash@zash.se>
parents:
4440
diff
changeset
|
36 |
if title and content and content:sub(1, #title) ~= title then |
4442
2bb11055e4bb
mod_pubsub_summary: Make titles *bold* to stand out more
Kim Alvefur <zash@zash.se>
parents:
4441
diff
changeset
|
37 |
summary = "*" .. title .. "*\n\n" .. content; |
4430
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
38 |
elseif title or content then |
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
39 |
summary = content or title; |
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
40 |
end |
4440
07529dba102d
mod_pubsub_summary: Include multiple links (e.g. podcast media)
Kim Alvefur <zash@zash.se>
parents:
4439
diff
changeset
|
41 |
for link in payload:childtags("link") do |
07529dba102d
mod_pubsub_summary: Include multiple links (e.g. podcast media)
Kim Alvefur <zash@zash.se>
parents:
4439
diff
changeset
|
42 |
if link and link.attr.href and link.attr.href ~= content then |
07529dba102d
mod_pubsub_summary: Include multiple links (e.g. podcast media)
Kim Alvefur <zash@zash.se>
parents:
4439
diff
changeset
|
43 |
summary = (summary and summary .. "\n" or "") .. link.attr.href; |
5133
cde38b7de04a
mod_pubsub_summary: Hide link relation when value is "alternate"
Kim Alvefur <zash@zash.se>
parents:
4613
diff
changeset
|
44 |
if link.attr.rel and link.attr.rel ~= "alternate" then summary = summary .. " [" .. link.attr.rel .. "]" end |
4440
07529dba102d
mod_pubsub_summary: Include multiple links (e.g. podcast media)
Kim Alvefur <zash@zash.se>
parents:
4439
diff
changeset
|
45 |
end |
4430
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
46 |
end |
5135
82e7251d4f52
mod_pubsub_summary: Render geo:-URI from OASIS emergency broadcasts
Kim Alvefur <zash@zash.se>
parents:
5133
diff
changeset
|
47 |
for area in payload:childtags("area", "urn:oasis:names:tc:emergency:cap:1.2") do |
82e7251d4f52
mod_pubsub_summary: Render geo:-URI from OASIS emergency broadcasts
Kim Alvefur <zash@zash.se>
parents:
5133
diff
changeset
|
48 |
local pos = area:get_child_text("circle"); |
82e7251d4f52
mod_pubsub_summary: Render geo:-URI from OASIS emergency broadcasts
Kim Alvefur <zash@zash.se>
parents:
5133
diff
changeset
|
49 |
if pos then |
82e7251d4f52
mod_pubsub_summary: Render geo:-URI from OASIS emergency broadcasts
Kim Alvefur <zash@zash.se>
parents:
5133
diff
changeset
|
50 |
summary = summary .. "\n" .. "geo:"..pos:match("[%d.,]+"); |
82e7251d4f52
mod_pubsub_summary: Render geo:-URI from OASIS emergency broadcasts
Kim Alvefur <zash@zash.se>
parents:
5133
diff
changeset
|
51 |
end |
82e7251d4f52
mod_pubsub_summary: Render geo:-URI from OASIS emergency broadcasts
Kim Alvefur <zash@zash.se>
parents:
5133
diff
changeset
|
52 |
end |
4430
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
53 |
return summary; |
3fe2c264aac4
mod_pubsub_summary: Mangle HTML payloads in Atom/RSS feeds
Kim Alvefur <zash@zash.se>
parents:
diff
changeset
|
54 |
end, 1); |