mod_anti_spam/mod_anti_spam.lua
author Matthew Wild <mwild1@gmail.com>
Tue, 05 Mar 2024 18:26:29 +0000
changeset 5863 259ffdbf8906
permissions -rw-r--r--
mod_anti_spam: New module for spam filtering (pre-alpha)

local ip = require "util.ip";
local jid_bare = require "util.jid".bare;
local jid_split = require "util.jid".split;
local set = require "util.set";
local sha256 = require "util.hashes".sha256;
local st = require"util.stanza";
local is_contact_subscribed = require "core.rostermanager".is_contact_subscribed;
local full_sessions = prosody.full_sessions;

local user_exists = require "core.usermanager".user_exists;

local new_rtbl_subscription = module:require("rtbl").new_rtbl_subscription;
local trie = module:require("trie");

local spam_source_domains = set.new();
local spam_source_ips = trie.new();
local spam_source_jids = set.new();

local count_spam_blocked = module:metric("counter", "anti_spam_blocked", "stanzas", "Stanzas blocked as spam", {"reason"});

function block_spam(event, reason, action)
	event.spam_reason = reason;
	event.spam_action = action;
	if module:fire_event("spam-blocked", event) == false then
		module:log("debug", "Spam allowed by another module");
		return;
	end

	count_spam_blocked:with_labels(reason):add(1);

	if action == "bounce" then
		module:log("debug", "Bouncing likely spam %s from %s (%s)", event.stanza.name, event.stanza.attr.from, reason);
		event.origin.send(st.error_reply("cancel", "policy-violation", "Rejected as spam"));
	else
		module:log("debug", "Discarding likely spam %s from %s (%s)", event.stanza.name, event.stanza.attr.from, reason);
	end

	return true;
end

function is_from_stranger(from_jid, event)
	local stanza = event.stanza;
	local to_user, to_host, to_resource = jid_split(stanza.attr.to);

	if not to_user then return false; end

	local to_session = full_sessions[stanza.attr.to];
	if to_session then return false; end

	if not is_contact_subscribed(to_user, to_host, from_jid) then
		-- Allow all messages from your own jid
		if from_jid == to_user.."@"..to_host then
			return false; -- Pass through
		end
		if to_resource and stanza.attr.type == "groupchat" then
			return false; -- Pass through
		end
		return true; -- Stranger danger
	end
end

function is_spammy_server(session)
	if spam_source_domains:contains(session.from_host) then
		return true;
	end
	local origin_ip = ip.new(session.ip);
	if spam_source_ips:contains_ip(origin_ip) then
		return true;
	end
end

function is_spammy_sender(sender_jid)
	return spam_source_jids:contains(sha256(sender_jid, true));
end

local spammy_strings = module:get_option_array("anti_spam_block_strings");
local spammy_patterns = module:get_option_array("anti_spam_block_patterns");

function is_spammy_content(stanza)
	-- Only support message content
	if stanza.name ~= "message" then return; end
	if not (spammy_strings or spammy_patterns) then return; end

	local body = stanza:get_child_text("body");
	if spammy_strings then
		for _, s in ipairs(spammy_strings) do
			if body:find(s, 1, true) then
				return true;
			end
		end
	end
	if spammy_patterns then
		for _, s in ipairs(spammy_patterns) do
			if body:find(s) then
				return true;
			end
		end
	end
end

-- Set up RTBLs

local anti_spam_services = module:get_option_array("anti_spam_services");

for _, rtbl_service_jid in ipairs(anti_spam_services) do
	new_rtbl_subscription(rtbl_service_jid, "spam_source_domains", {
		added = function (item)
			spam_source_domains:add(item);
		end;
		removed = function (item)
			spam_source_domains:remove(item);
		end;
	});
	new_rtbl_subscription(rtbl_service_jid, "spam_source_ips", {
		added = function (item)
			spam_source_ips:add_subnet(ip.parse_cidr(item));
		end;
		removed = function (item)
			spam_source_ips:remove_subnet(ip.parse_cidr(item));
		end;
	});
	new_rtbl_subscription(rtbl_service_jid, "spam_source_jids_sha256", {
		added = function (item)
			spam_source_jids:add(item);
		end;
		removed = function (item)
			spam_source_jids:remove(item);
		end;
	});
end

module:hook("message/bare", function (event)
	local to_bare = jid_bare(event.stanza.attr.to);

	if not user_exists(to_bare) then return; end

	local from_bare = jid_bare(event.stanza.attr.from);
	if not is_from_stranger(from_bare, event) then return; end

	if is_spammy_server(event.origin) then
		return block_spam(event, "known-spam-source", "drop");
	end

	if is_spammy_sender(from_bare) then
		return block_spam(event, "known-spam-jid", "drop");
	end

	if is_spammy_content(event.stanza) then
		return block_spam(event, "spam-content", "drop");
	end
end, 500);

module:hook("presence/bare", function (event)
	if event.stanza.type ~= "subscribe" then
		return;
	end

	if is_spammy_server(event.origin) then
		return block_spam(event, "known-spam-source", "drop");
	end

	if is_spammy_sender(event.stanza) then
		return block_spam(event, "known-spam-jid", "drop");
	end
end, 500);