mod_ogp/mod_ogp.lua
author Matthew Wild <mwild1@gmail.com>
Tue, 18 Jan 2022 17:01:18 +0000
changeset 4880 0f5f2d4475b9
parent 4602 09f0911c735d
permissions -rw-r--r--
mod_http_xep227: Add support for import via APIs rather than direct store manipulation In particular this transitions PEP nodes and data to be imported via mod_pep's APIs, fixing issues with importing at runtime while PEP data may already be live in RAM. Next obvious candidate for this approach is rosters, so clients get immediate roster pushes and other special handling (such as emitting subscribes to reach the desired subscription state).
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
4256
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
     1
local mod_muc = module:depends("muc")
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
     2
local http = require "net.http"
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
     3
local st = require "util.stanza"
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
     4
local url_pattern = [[https?://%S+]]
4487
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4486
diff changeset
     5
local domain_pattern = '^%w+://([^/]+)'
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4486
diff changeset
     6
local xmlns_fasten = "urn:xmpp:fasten:0"
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4486
diff changeset
     7
local xmlns_xhtml = "http://www.w3.org/1999/xhtml"
4602
09f0911c735d mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents: 4508
diff changeset
     8
local allowlist = module:get_option_set("ogp_domain_allowlist", module:get_option_set("ogp_domain_whitelist", {}))
09f0911c735d mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents: 4508
diff changeset
     9
local denylist = module:get_option_set("ogp_domain_denylist", {})
4487
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4486
diff changeset
    10
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4486
diff changeset
    11
4602
09f0911c735d mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents: 4508
diff changeset
    12
local function is_allowed(domain)
09f0911c735d mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents: 4508
diff changeset
    13
	if allowlist:empty() then
09f0911c735d mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents: 4508
diff changeset
    14
		return true
09f0911c735d mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents: 4508
diff changeset
    15
	end
09f0911c735d mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents: 4508
diff changeset
    16
	if allowlist:contains(domain) then
4487
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4486
diff changeset
    17
		return true
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4486
diff changeset
    18
	end
4602
09f0911c735d mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents: 4508
diff changeset
    19
	return false
09f0911c735d mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents: 4508
diff changeset
    20
end
09f0911c735d mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents: 4508
diff changeset
    21
09f0911c735d mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents: 4508
diff changeset
    22
local function is_denied(domain)
09f0911c735d mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents: 4508
diff changeset
    23
	if denylist:empty() then
09f0911c735d mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents: 4508
diff changeset
    24
		return false
09f0911c735d mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents: 4508
diff changeset
    25
	end
09f0911c735d mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents: 4508
diff changeset
    26
	if denylist:contains(domain) then
09f0911c735d mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents: 4508
diff changeset
    27
		return true
4487
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4486
diff changeset
    28
	end
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4486
diff changeset
    29
	return false
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4486
diff changeset
    30
end
4256
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
    31
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
    32
4486
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4463
diff changeset
    33
local function fetch_ogp_data(room, url, origin_id)
4602
09f0911c735d mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents: 4508
diff changeset
    34
	if not url then
09f0911c735d mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents: 4508
diff changeset
    35
		return;
09f0911c735d mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents: 4508
diff changeset
    36
	end
09f0911c735d mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents: 4508
diff changeset
    37
09f0911c735d mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents: 4508
diff changeset
    38
	local domain = url:match(domain_pattern);
09f0911c735d mod_ogp: Add the ability to block OGP fetching for certain domains
JC Brand <jc@opkode.com>
parents: 4508
diff changeset
    39
	if is_denied(domain) or not is_allowed(domain) then
4487
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4486
diff changeset
    40
		return;
c4f11a4b5ac7 mod_ogp: Add the ability to whitelist domains
JC Brand <jc@opkode.com>
parents: 4486
diff changeset
    41
	end
4256
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
    42
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
    43
	http.request(
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
    44
		url,
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
    45
		nil,
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
    46
		function(response_body, response_code, _)
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
    47
			if response_code ~= 200 then
4508
0136c98f574c mod_ogp: Log error responses
JC Brand <jc@opkode.com>
parents: 4487
diff changeset
    48
				module:log("debug", "Call to %s returned code %s and body %s", url, response_code, response_body)
4256
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
    49
				return
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
    50
			end
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
    51
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
    52
			local to = room.jid
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
    53
			local from = room and room.jid or module.host
4463
dbfda7f5522d mod_ogp: Make sure OGP fasten messages get archived
JC Brand <jc@opkode.com>
parents: 4259
diff changeset
    54
			local fastening = st.message({to = to, from = from, type = 'groupchat'}):tag("apply-to", {xmlns = xmlns_fasten, id = origin_id})
4256
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
    55
			local found_metadata = false
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
    56
			local message_body = ""
4259
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    57
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    58
			local meta_pattern = [[<meta (.-)/?>]]
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    59
			for match in response_body:gmatch(meta_pattern) do
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    60
				local property = match:match([[property=%s*["']?(og:.-)["']?%s]])
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    61
				if not property then
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    62
					property = match:match([[property=["']?(og:.-)["']$]])
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    63
				end
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    64
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    65
				local content = match:match([[content=%s*["'](.-)["']%s]])
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    66
				if not content then
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    67
					content = match:match([[content=["']?(.-)["']$]])
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    68
				end
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    69
				if not content then
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    70
					content = match:match([[content=(.-) property]])
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    71
				end
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    72
				if not content then
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    73
					content = match:match([[content=(.-)$]])
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    74
				end
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    75
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    76
				if property and content then
4508
0136c98f574c mod_ogp: Log error responses
JC Brand <jc@opkode.com>
parents: 4487
diff changeset
    77
					module:log("debug", property .. "\t" .. content)
4259
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    78
					fastening:tag(
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    79
						"meta",
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    80
						{
4463
dbfda7f5522d mod_ogp: Make sure OGP fasten messages get archived
JC Brand <jc@opkode.com>
parents: 4259
diff changeset
    81
							xmlns = xmlns_xhtml,
4259
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    82
							property = property,
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    83
							content = content
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    84
						}
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    85
					):up()
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    86
					found_metadata = true
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    87
					message_body = message_body .. property .. "\t" .. content .. "\n"
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    88
				end
4256
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
    89
			end
4259
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    90
4256
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
    91
			if found_metadata then
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
    92
				mod_muc.get_room_from_jid(room.jid):broadcast_message(fastening)
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
    93
			end
4508
0136c98f574c mod_ogp: Log error responses
JC Brand <jc@opkode.com>
parents: 4487
diff changeset
    94
			module:log("debug", tostring(fastening))
4256
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
    95
		end
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
    96
	)
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
    97
end
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
    98
4486
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4463
diff changeset
    99
local function ogp_handler(event)
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4463
diff changeset
   100
	local room, stanza = event.room, st.clone(event.stanza)
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4463
diff changeset
   101
	local body = stanza:get_child_text("body")
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4463
diff changeset
   102
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4463
diff changeset
   103
	if not body then return; end
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4463
diff changeset
   104
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4463
diff changeset
   105
	local origin_id = stanza:find("{urn:xmpp:sid:0}origin-id@id")
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4463
diff changeset
   106
	if not origin_id then return; end
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4463
diff changeset
   107
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4463
diff changeset
   108
	for url in body:gmatch(url_pattern) do
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4463
diff changeset
   109
		fetch_ogp_data(room, url, origin_id);
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4463
diff changeset
   110
	end
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4463
diff changeset
   111
end
21698b960bd6 mod_ogp: Add the ability to detect and process multiple URLs in a body
JC Brand <jc@opkode.com>
parents: 4463
diff changeset
   112
4256
1327e1e1c94e mod_ogp: Add module
Seve Ferrer <seve@delape.net>
parents:
diff changeset
   113
module:hook("muc-occupant-groupchat", ogp_handler)
4463
dbfda7f5522d mod_ogp: Make sure OGP fasten messages get archived
JC Brand <jc@opkode.com>
parents: 4259
diff changeset
   114
dbfda7f5522d mod_ogp: Make sure OGP fasten messages get archived
JC Brand <jc@opkode.com>
parents: 4259
diff changeset
   115
dbfda7f5522d mod_ogp: Make sure OGP fasten messages get archived
JC Brand <jc@opkode.com>
parents: 4259
diff changeset
   116
module:hook("muc-message-is-historic", function (event)
dbfda7f5522d mod_ogp: Make sure OGP fasten messages get archived
JC Brand <jc@opkode.com>
parents: 4259
diff changeset
   117
	local fastening = event.stanza:get_child('apply-to', xmlns_fasten)
dbfda7f5522d mod_ogp: Make sure OGP fasten messages get archived
JC Brand <jc@opkode.com>
parents: 4259
diff changeset
   118
	if fastening and fastening:get_child('meta', xmlns_xhtml) then
dbfda7f5522d mod_ogp: Make sure OGP fasten messages get archived
JC Brand <jc@opkode.com>
parents: 4259
diff changeset
   119
		return true
dbfda7f5522d mod_ogp: Make sure OGP fasten messages get archived
JC Brand <jc@opkode.com>
parents: 4259
diff changeset
   120
	end
dbfda7f5522d mod_ogp: Make sure OGP fasten messages get archived
JC Brand <jc@opkode.com>
parents: 4259
diff changeset
   121
end);