author | Matthew Wild <mwild1@gmail.com> |
Tue, 16 Apr 2024 12:58:08 +0100 | |
changeset 5889 | 54b451c3790c |
parent 4602 | 09f0911c735d |
permissions | -rw-r--r-- |
4256 | 1 |
local mod_muc = module:depends("muc") |
2 |
local http = require "net.http" |
|
3 |
local st = require "util.stanza" |
|
4 |
local url_pattern = [[https?://%S+]] |
|
4487
c4f11a4b5ac7
mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents:
4486
diff
changeset
|
5 |
local domain_pattern = '^%w+://([^/]+)' |
c4f11a4b5ac7
mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents:
4486
diff
changeset
|
6 |
local xmlns_fasten = "urn:xmpp:fasten:0" |
c4f11a4b5ac7
mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents:
4486
diff
changeset
|
7 |
local xmlns_xhtml = "http://www.w3.org/1999/xhtml" |
4602
09f0911c735d
mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents:
4508
diff
changeset
|
8 |
local allowlist = module:get_option_set("ogp_domain_allowlist", module:get_option_set("ogp_domain_whitelist", {})) |
09f0911c735d
mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents:
4508
diff
changeset
|
9 |
local denylist = module:get_option_set("ogp_domain_denylist", {}) |
4487
c4f11a4b5ac7
mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents:
4486
diff
changeset
|
10 |
|
c4f11a4b5ac7
mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents:
4486
diff
changeset
|
11 |
|
4602
09f0911c735d
mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents:
4508
diff
changeset
|
12 |
local function is_allowed(domain) |
09f0911c735d
mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents:
4508
diff
changeset
|
13 |
if allowlist:empty() then |
09f0911c735d
mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents:
4508
diff
changeset
|
14 |
return true |
09f0911c735d
mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents:
4508
diff
changeset
|
15 |
end |
09f0911c735d
mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents:
4508
diff
changeset
|
16 |
if allowlist:contains(domain) then |
4487
c4f11a4b5ac7
mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents:
4486
diff
changeset
|
17 |
return true |
c4f11a4b5ac7
mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents:
4486
diff
changeset
|
18 |
end |
4602
09f0911c735d
mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents:
4508
diff
changeset
|
19 |
return false |
09f0911c735d
mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents:
4508
diff
changeset
|
20 |
end |
09f0911c735d
mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents:
4508
diff
changeset
|
21 |
|
09f0911c735d
mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents:
4508
diff
changeset
|
22 |
local function is_denied(domain) |
09f0911c735d
mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents:
4508
diff
changeset
|
23 |
if denylist:empty() then |
09f0911c735d
mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents:
4508
diff
changeset
|
24 |
return false |
09f0911c735d
mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents:
4508
diff
changeset
|
25 |
end |
09f0911c735d
mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents:
4508
diff
changeset
|
26 |
if denylist:contains(domain) then |
09f0911c735d
mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents:
4508
diff
changeset
|
27 |
return true |
4487
c4f11a4b5ac7
mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents:
4486
diff
changeset
|
28 |
end |
c4f11a4b5ac7
mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents:
4486
diff
changeset
|
29 |
return false |
c4f11a4b5ac7
mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents:
4486
diff
changeset
|
30 |
end |
4256 | 31 |
|
32 |
||
4486
21698b960bd6
mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents:
4463
diff
changeset
|
33 |
local function fetch_ogp_data(room, url, origin_id) |
4602
09f0911c735d
mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents:
4508
diff
changeset
|
34 |
if not url then |
09f0911c735d
mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents:
4508
diff
changeset
|
35 |
return; |
09f0911c735d
mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents:
4508
diff
changeset
|
36 |
end |
09f0911c735d
mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents:
4508
diff
changeset
|
37 |
|
09f0911c735d
mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents:
4508
diff
changeset
|
38 |
local domain = url:match(domain_pattern); |
09f0911c735d
mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents:
4508
diff
changeset
|
39 |
if is_denied(domain) or not is_allowed(domain) then |
4487
c4f11a4b5ac7
mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents:
4486
diff
changeset
|
40 |
return; |
c4f11a4b5ac7
mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents:
4486
diff
changeset
|
41 |
end |
4256 | 42 |
|
43 |
http.request( |
|
44 |
url, |
|
45 |
nil, |
|
46 |
function(response_body, response_code, _) |
|
47 |
if response_code ~= 200 then |
|
4508 | 48 |
module:log("debug", "Call to %s returned code %s and body %s", url, response_code, response_body) |
4256 | 49 |
return |
50 |
end |
|
51 |
||
52 |
local to = room.jid |
|
53 |
local from = room and room.jid or module.host |
|
4463
dbfda7f5522d
mod_ogp: Make sure OGP fasten messages get archived
JC Brand <jc@opkode.com>
parents:
4259
diff
changeset
|
54 |
local fastening = st.message({to = to, from = from, type = 'groupchat'}):tag("apply-to", {xmlns = xmlns_fasten, id = origin_id}) |
4256 | 55 |
local found_metadata = false |
56 |
local message_body = "" |
|
4259
38da10e4b593
mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents:
4258
diff
changeset
|
57 |
|
38da10e4b593
mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents:
4258
diff
changeset
|
58 |
local meta_pattern = [[<meta (.-)/?>]] |
38da10e4b593
mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents:
4258
diff
changeset
|
59 |
for match in response_body:gmatch(meta_pattern) do |
38da10e4b593
mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents:
4258
diff
changeset
|
60 |
local property = match:match([[property=%s*["']?(og:.-)["']?%s]]) |
38da10e4b593
mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents:
4258
diff
changeset
|
61 |
if not property then |
38da10e4b593
mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents:
4258
diff
changeset
|
62 |
property = match:match([[property=["']?(og:.-)["']$]]) |
38da10e4b593
mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents:
4258
diff
changeset
|
63 |
end |
38da10e4b593
mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents:
4258
diff
changeset
|
64 |
|
38da10e4b593
mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents:
4258
diff
changeset
|
65 |
local content = match:match([[content=%s*["'](.-)["']%s]]) |
38da10e4b593
mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents:
4258
diff
changeset
|
66 |
if not content then |
38da10e4b593
mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents:
4258
diff
changeset
|
67 |
content = match:match([[content=["']?(.-)["']$]]) |
38da10e4b593
mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents:
4258
diff
changeset
|
68 |
end |
38da10e4b593
mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents:
4258
diff
changeset
|
69 |
if not content then |
38da10e4b593
mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents:
4258
diff
changeset
|
70 |
content = match:match([[content=(.-) property]]) |
38da10e4b593
mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents:
4258
diff
changeset
|
71 |
end |
38da10e4b593
mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents:
4258
diff
changeset
|
72 |
if not content then |
38da10e4b593
mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents:
4258
diff
changeset
|
73 |
content = match:match([[content=(.-)$]]) |
38da10e4b593
mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents:
4258
diff
changeset
|
74 |
end |
38da10e4b593
mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents:
4258
diff
changeset
|
75 |
|
38da10e4b593
mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents:
4258
diff
changeset
|
76 |
if property and content then |
4508 | 77 |
module:log("debug", property .. "\t" .. content) |
4259
38da10e4b593
mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents:
4258
diff
changeset
|
78 |
fastening:tag( |
38da10e4b593
mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents:
4258
diff
changeset
|
79 |
"meta", |
38da10e4b593
mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents:
4258
diff
changeset
|
80 |
{ |
4463
dbfda7f5522d
mod_ogp: Make sure OGP fasten messages get archived
JC Brand <jc@opkode.com>
parents:
4259
diff
changeset
|
81 |
xmlns = xmlns_xhtml, |
4259
38da10e4b593
mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents:
4258
diff
changeset
|
82 |
property = property, |
38da10e4b593
mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents:
4258
diff
changeset
|
83 |
content = content |
38da10e4b593
mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents:
4258
diff
changeset
|
84 |
} |
38da10e4b593
mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents:
4258
diff
changeset
|
85 |
):up() |
38da10e4b593
mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents:
4258
diff
changeset
|
86 |
found_metadata = true |
38da10e4b593
mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents:
4258
diff
changeset
|
87 |
message_body = message_body .. property .. "\t" .. content .. "\n" |
38da10e4b593
mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents:
4258
diff
changeset
|
88 |
end |
4256 | 89 |
end |
4259
38da10e4b593
mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents:
4258
diff
changeset
|
90 |
|
4256 | 91 |
if found_metadata then |
92 |
mod_muc.get_room_from_jid(room.jid):broadcast_message(fastening) |
|
93 |
end |
|
4508 | 94 |
module:log("debug", tostring(fastening)) |
4256 | 95 |
end |
96 |
) |
|
97 |
end |
|
98 |
||
4486
21698b960bd6
mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents:
4463
diff
changeset
|
99 |
local function ogp_handler(event) |
21698b960bd6
mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents:
4463
diff
changeset
|
100 |
local room, stanza = event.room, st.clone(event.stanza) |
21698b960bd6
mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents:
4463
diff
changeset
|
101 |
local body = stanza:get_child_text("body") |
21698b960bd6
mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents:
4463
diff
changeset
|
102 |
|
21698b960bd6
mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents:
4463
diff
changeset
|
103 |
if not body then return; end |
21698b960bd6
mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents:
4463
diff
changeset
|
104 |
|
21698b960bd6
mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents:
4463
diff
changeset
|
105 |
local origin_id = stanza:find("{urn:xmpp:sid:0}origin-id@id") |
21698b960bd6
mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents:
4463
diff
changeset
|
106 |
if not origin_id then return; end |
21698b960bd6
mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents:
4463
diff
changeset
|
107 |
|
21698b960bd6
mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents:
4463
diff
changeset
|
108 |
for url in body:gmatch(url_pattern) do |
21698b960bd6
mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents:
4463
diff
changeset
|
109 |
fetch_ogp_data(room, url, origin_id); |
21698b960bd6
mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents:
4463
diff
changeset
|
110 |
end |
21698b960bd6
mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents:
4463
diff
changeset
|
111 |
end |
21698b960bd6
mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents:
4463
diff
changeset
|
112 |
|
4256 | 113 |
module:hook("muc-occupant-groupchat", ogp_handler) |
4463
dbfda7f5522d
mod_ogp: Make sure OGP fasten messages get archived
JC Brand <jc@opkode.com>
parents:
4259
diff
changeset
|
114 |
|
dbfda7f5522d
mod_ogp: Make sure OGP fasten messages get archived
JC Brand <jc@opkode.com>
parents:
4259
diff
changeset
|
115 |
|
dbfda7f5522d
mod_ogp: Make sure OGP fasten messages get archived
JC Brand <jc@opkode.com>
parents:
4259
diff
changeset
|
116 |
module:hook("muc-message-is-historic", function (event) |
dbfda7f5522d
mod_ogp: Make sure OGP fasten messages get archived
JC Brand <jc@opkode.com>
parents:
4259
diff
changeset
|
117 |
local fastening = event.stanza:get_child('apply-to', xmlns_fasten) |
dbfda7f5522d
mod_ogp: Make sure OGP fasten messages get archived
JC Brand <jc@opkode.com>
parents:
4259
diff
changeset
|
118 |
if fastening and fastening:get_child('meta', xmlns_xhtml) then |
dbfda7f5522d
mod_ogp: Make sure OGP fasten messages get archived
JC Brand <jc@opkode.com>
parents:
4259
diff
changeset
|
119 |
return true |
dbfda7f5522d
mod_ogp: Make sure OGP fasten messages get archived
JC Brand <jc@opkode.com>
parents:
4259
diff
changeset
|
120 |
end |
dbfda7f5522d
mod_ogp: Make sure OGP fasten messages get archived
JC Brand <jc@opkode.com>
parents:
4259
diff
changeset
|
121 |
end); |