mod_ogp/mod_ogp.lua
author Matthew Wild <mwild1@gmail.com>
Sat, 24 Sep 2022 09:26:26 +0100
changeset 5063 5f1120c284c5
parent 4602 09f0911c735d
permissions -rw-r--r--
mod_cloud_notify_extensions: Add note about dependency Noting here because people might not click through to see it on the mod_cloud_notify_encrypted page.
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
4256
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
     1
local mod_muc = module:depends("muc")
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
     2
local http = require "net.http"
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
     3
local st = require "util.stanza"
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
     4
local url_pattern = [[https?://%S+]]
4487
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4486
diff changeset
     5
local domain_pattern = '^%w+://([^/]+)'
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4486
diff changeset
     6
local xmlns_fasten = "urn:xmpp:fasten:0"
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4486
diff changeset
     7
local xmlns_xhtml = "http://www.w3.org/1999/xhtml"
4602
09f0911c735d mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents: 4508
diff changeset
     8
local allowlist = module:get_option_set("ogp_domain_allowlist", module:get_option_set("ogp_domain_whitelist", {}))
09f0911c735d mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents: 4508
diff changeset
     9
local denylist = module:get_option_set("ogp_domain_denylist", {})
4487
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4486
diff changeset
    10
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4486
diff changeset
    11
4602
09f0911c735d mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents: 4508
diff changeset
    12
local function is_allowed(domain)
09f0911c735d mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents: 4508
diff changeset
    13
	if allowlist:empty() then
09f0911c735d mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents: 4508
diff changeset
    14
		return true
09f0911c735d mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents: 4508
diff changeset
    15
	end
09f0911c735d mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents: 4508
diff changeset
    16
	if allowlist:contains(domain) then
4487
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4486
diff changeset
    17
		return true
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4486
diff changeset
    18
	end
4602
09f0911c735d mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents: 4508
diff changeset
    19
	return false
09f0911c735d mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents: 4508
diff changeset
    20
end
09f0911c735d mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents: 4508
diff changeset
    21
09f0911c735d mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents: 4508
diff changeset
    22
local function is_denied(domain)
09f0911c735d mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents: 4508
diff changeset
    23
	if denylist:empty() then
09f0911c735d mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents: 4508
diff changeset
    24
		return false
09f0911c735d mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents: 4508
diff changeset
    25
	end
09f0911c735d mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents: 4508
diff changeset
    26
	if denylist:contains(domain) then
09f0911c735d mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents: 4508
diff changeset
    27
		return true
4487
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4486
diff changeset
    28
	end
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4486
diff changeset
    29
	return false
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4486
diff changeset
    30
end
4256
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
    31
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
    32
4486
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4463
diff changeset
    33
local function fetch_ogp_data(room, url, origin_id)
4602
09f0911c735d mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents: 4508
diff changeset
    34
	if not url then
09f0911c735d mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents: 4508
diff changeset
    35
		return;
09f0911c735d mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents: 4508
diff changeset
    36
	end
09f0911c735d mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents: 4508
diff changeset
    37
09f0911c735d mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents: 4508
diff changeset
    38
	local domain = url:match(domain_pattern);
09f0911c735d mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents: 4508
diff changeset
    39
	if is_denied(domain) or not is_allowed(domain) then
4487
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4486
diff changeset
    40
		return;
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4486
diff changeset
    41
	end
4256
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
    42
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
    43
	http.request(
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
    44
		url,
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
    45
		nil,
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
    46
		function(response_body, response_code, _)
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
    47
			if response_code ~= 200 then
4508
0136c98f574c mod_ogp: Log error responses
JC Brand <jc@opkode.com>
parents: 4487
diff changeset
    48
				module:log("debug", "Call to %s returned code %s and body %s", url, response_code, response_body)
4256
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
    49
				return
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
    50
			end
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
    51
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
    52
			local to = room.jid
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
    53
			local from = room and room.jid or module.host
4463
dbfda7f5522d mod_ogp: Make sure OGP fasten messages get archived
JC Brand <jc@opkode.com>
parents: 4259
diff changeset
    54
			local fastening = st.message({to = to, from = from, type = 'groupchat'}):tag("apply-to", {xmlns = xmlns_fasten, id = origin_id})
4256
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
    55
			local found_metadata = false
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
    56
			local message_body = ""
4259
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    57
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    58
			local meta_pattern = [[<meta (.-)/?>]]
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    59
			for match in response_body:gmatch(meta_pattern) do
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    60
				local property = match:match([[property=%s*["']?(og:.-)["']?%s]])
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    61
				if not property then
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    62
					property = match:match([[property=["']?(og:.-)["']$]])
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    63
				end
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    64
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    65
				local content = match:match([[content=%s*["'](.-)["']%s]])
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    66
				if not content then
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    67
					content = match:match([[content=["']?(.-)["']$]])
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    68
				end
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    69
				if not content then
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    70
					content = match:match([[content=(.-) property]])
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    71
				end
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    72
				if not content then
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    73
					content = match:match([[content=(.-)$]])
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    74
				end
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    75
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    76
				if property and content then
4508
0136c98f574c mod_ogp: Log error responses
JC Brand <jc@opkode.com>
parents: 4487
diff changeset
    77
					module:log("debug", property .. "\t" .. content)
4259
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    78
					fastening:tag(
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    79
						"meta",
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    80
						{
4463
dbfda7f5522d mod_ogp: Make sure OGP fasten messages get archived
JC Brand <jc@opkode.com>
parents: 4259
diff changeset
    81
							xmlns = xmlns_xhtml,
4259
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    82
							property = property,
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    83
							content = content
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    84
						}
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    85
					):up()
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    86
					found_metadata = true
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    87
					message_body = message_body .. property .. "\t" .. content .. "\n"
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    88
				end
4256
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
    89
			end
4259
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    90
4256
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
    91
			if found_metadata then
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
    92
				mod_muc.get_room_from_jid(room.jid):broadcast_message(fastening)
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
    93
			end
4508
0136c98f574c mod_ogp: Log error responses
JC Brand <jc@opkode.com>
parents: 4487
diff changeset
    94
			module:log("debug", tostring(fastening))
4256
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
    95
		end
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
    96
	)
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
    97
end
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
    98
4486
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4463
diff changeset
    99
local function ogp_handler(event)
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4463
diff changeset
   100
	local room, stanza = event.room, st.clone(event.stanza)
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4463
diff changeset
   101
	local body = stanza:get_child_text("body")
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4463
diff changeset
   102
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4463
diff changeset
   103
	if not body then return; end
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4463
diff changeset
   104
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4463
diff changeset
   105
	local origin_id = stanza:find("{urn:xmpp:sid:0}origin-id@id")
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4463
diff changeset
   106
	if not origin_id then return; end
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4463
diff changeset
   107
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4463
diff changeset
   108
	for url in body:gmatch(url_pattern) do
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4463
diff changeset
   109
		fetch_ogp_data(room, url, origin_id);
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4463
diff changeset
   110
	end
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4463
diff changeset
   111
end
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4463
diff changeset
   112
4256
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
   113
module:hook("muc-occupant-groupchat", ogp_handler)
4463
dbfda7f5522d mod_ogp: Make sure OGP fasten messages get archived
JC Brand <jc@opkode.com>
parents: 4259
diff changeset
   114
dbfda7f5522d mod_ogp: Make sure OGP fasten messages get archived
JC Brand <jc@opkode.com>
parents: 4259
diff changeset
   115
dbfda7f5522d mod_ogp: Make sure OGP fasten messages get archived
JC Brand <jc@opkode.com>
parents: 4259
diff changeset
   116
module:hook("muc-message-is-historic", function (event)
dbfda7f5522d mod_ogp: Make sure OGP fasten messages get archived
JC Brand <jc@opkode.com>
parents: 4259
diff changeset
   117
	local fastening = event.stanza:get_child('apply-to', xmlns_fasten)
dbfda7f5522d mod_ogp: Make sure OGP fasten messages get archived
JC Brand <jc@opkode.com>
parents: 4259
diff changeset
   118
	if fastening and fastening:get_child('meta', xmlns_xhtml) then
dbfda7f5522d mod_ogp: Make sure OGP fasten messages get archived
JC Brand <jc@opkode.com>
parents: 4259
diff changeset
   119
		return true
dbfda7f5522d mod_ogp: Make sure OGP fasten messages get archived
JC Brand <jc@opkode.com>
parents: 4259
diff changeset
   120
	end
dbfda7f5522d mod_ogp: Make sure OGP fasten messages get archived
JC Brand <jc@opkode.com>
parents: 4259
diff changeset
   121
end);