# HG changeset patch # User Matthew Wild # Date 1709663189 0 # Node ID 259ffdbf89064f871b805b46fc74e6b7a8ec4121 # Parent 761142ee0ff2713f8d0a85d51b2d01ed8c6b58db mod_anti_spam: New module for spam filtering (pre-alpha) diff -r 761142ee0ff2 -r 259ffdbf8906 mod_anti_spam/mod_anti_spam.lua --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mod_anti_spam/mod_anti_spam.lua Tue Mar 05 18:26:29 2024 +0000 @@ -0,0 +1,165 @@ +local ip = require "util.ip"; +local jid_bare = require "util.jid".bare; +local jid_split = require "util.jid".split; +local set = require "util.set"; +local sha256 = require "util.hashes".sha256; +local st = require"util.stanza"; +local is_contact_subscribed = require "core.rostermanager".is_contact_subscribed; +local full_sessions = prosody.full_sessions; + +local user_exists = require "core.usermanager".user_exists; + +local new_rtbl_subscription = module:require("rtbl").new_rtbl_subscription; +local trie = module:require("trie"); + +local spam_source_domains = set.new(); +local spam_source_ips = trie.new(); +local spam_source_jids = set.new(); + +local count_spam_blocked = module:metric("counter", "anti_spam_blocked", "stanzas", "Stanzas blocked as spam", {"reason"}); + +function block_spam(event, reason, action) + event.spam_reason = reason; + event.spam_action = action; + if module:fire_event("spam-blocked", event) == false then + module:log("debug", "Spam allowed by another module"); + return; + end + + count_spam_blocked:with_labels(reason):add(1); + + if action == "bounce" then + module:log("debug", "Bouncing likely spam %s from %s (%s)", event.stanza.name, event.stanza.attr.from, reason); + event.origin.send(st.error_reply("cancel", "policy-violation", "Rejected as spam")); + else + module:log("debug", "Discarding likely spam %s from %s (%s)", event.stanza.name, event.stanza.attr.from, reason); + end + + return true; +end + +function is_from_stranger(from_jid, event) + local stanza = event.stanza; + local to_user, to_host, to_resource = jid_split(stanza.attr.to); + + if not to_user then return false; end + + local to_session = full_sessions[stanza.attr.to]; + if to_session then return false; end + + if not is_contact_subscribed(to_user, to_host, from_jid) then + -- Allow all messages from your own jid + if from_jid == to_user.."@"..to_host then + return false; -- Pass through + end + if to_resource and stanza.attr.type == "groupchat" then + return false; -- Pass through + end + return true; -- Stranger danger + end +end + +function is_spammy_server(session) + if spam_source_domains:contains(session.from_host) then + return true; + end + local origin_ip = ip.new(session.ip); + if spam_source_ips:contains_ip(origin_ip) then + return true; + end +end + +function is_spammy_sender(sender_jid) + return spam_source_jids:contains(sha256(sender_jid, true)); +end + +local spammy_strings = module:get_option_array("anti_spam_block_strings"); +local spammy_patterns = module:get_option_array("anti_spam_block_patterns"); + +function is_spammy_content(stanza) + -- Only support message content + if stanza.name ~= "message" then return; end + if not (spammy_strings or spammy_patterns) then return; end + + local body = stanza:get_child_text("body"); + if spammy_strings then + for _, s in ipairs(spammy_strings) do + if body:find(s, 1, true) then + return true; + end + end + end + if spammy_patterns then + for _, s in ipairs(spammy_patterns) do + if body:find(s) then + return true; + end + end + end +end + +-- Set up RTBLs + +local anti_spam_services = module:get_option_array("anti_spam_services"); + +for _, rtbl_service_jid in ipairs(anti_spam_services) do + new_rtbl_subscription(rtbl_service_jid, "spam_source_domains", { + added = function (item) + spam_source_domains:add(item); + end; + removed = function (item) + spam_source_domains:remove(item); + end; + }); + new_rtbl_subscription(rtbl_service_jid, "spam_source_ips", { + added = function (item) + spam_source_ips:add_subnet(ip.parse_cidr(item)); + end; + removed = function (item) + spam_source_ips:remove_subnet(ip.parse_cidr(item)); + end; + }); + new_rtbl_subscription(rtbl_service_jid, "spam_source_jids_sha256", { + added = function (item) + spam_source_jids:add(item); + end; + removed = function (item) + spam_source_jids:remove(item); + end; + }); +end + +module:hook("message/bare", function (event) + local to_bare = jid_bare(event.stanza.attr.to); + + if not user_exists(to_bare) then return; end + + local from_bare = jid_bare(event.stanza.attr.from); + if not is_from_stranger(from_bare, event) then return; end + + if is_spammy_server(event.origin) then + return block_spam(event, "known-spam-source", "drop"); + end + + if is_spammy_sender(from_bare) then + return block_spam(event, "known-spam-jid", "drop"); + end + + if is_spammy_content(event.stanza) then + return block_spam(event, "spam-content", "drop"); + end +end, 500); + +module:hook("presence/bare", function (event) + if event.stanza.type ~= "subscribe" then + return; + end + + if is_spammy_server(event.origin) then + return block_spam(event, "known-spam-source", "drop"); + end + + if is_spammy_sender(event.stanza) then + return block_spam(event, "known-spam-jid", "drop"); + end +end, 500); diff -r 761142ee0ff2 -r 259ffdbf8906 mod_anti_spam/rtbl.lib.lua --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mod_anti_spam/rtbl.lib.lua Tue Mar 05 18:26:29 2024 +0000 @@ -0,0 +1,122 @@ +local array = require "util.array"; +local id = require "util.id"; +local it = require "util.iterators"; +local set = require "util.set"; +local st = require "util.stanza"; + +module:depends("pubsub_subscription"); + +local function new_rtbl_subscription(rtbl_service_jid, rtbl_node, handlers) + local items = {}; + + local function notify(event_type, hash) + local handler = handlers[event_type]; + if not handler then return; end + handler(hash); + end + + module:add_item("pubsub-subscription", { + service = rtbl_service_jid; + node = rtbl_node; + + -- Callbacks: + on_subscribed = function() + module:log("info", "RTBL active: %s:%s", rtbl_service_jid, rtbl_node); + end; + + on_error = function(err) + module:log( + "error", + "Failed to subscribe to RTBL: %s:%s %s::%s: %s", + rtbl_service_jid, + rtbl_node, + err.type, + err.condition, + err.text + ); + end; + + on_item = function(event) + local hash = event.item.attr.id; + if not hash then return; end + module:log("debug", "Received new hash from %s:%s: %s", rtbl_service_jid, rtbl_node, hash); + items[hash] = true; + notify("added", hash); + end; + + on_retract = function (event) + local hash = event.item.attr.id; + if not hash then return; end + module:log("debug", "Retracted hash from %s:%s: %s", rtbl_service_jid, rtbl_node, hash); + items[hash] = nil; + notify("removed", hash); + end; + + purge = function() + module:log("debug", "Purge all hashes from %s:%s", rtbl_service_jid, rtbl_node); + for hash in pairs(items) do + items[hash] = nil; + notify("removed", hash); + end + end; + }); + + local request_id = "rtbl-request-"..id.short(); + + local function request_list() + local items_request = st.iq({ to = rtbl_service_jid, from = module.host, type = "get", id = request_id }) + :tag("pubsub", { xmlns = "http://jabber.org/protocol/pubsub" }) + :tag("items", { node = rtbl_node }):up() + :up(); + module:send(items_request); + end + + local function update_list(event) + local from_jid = event.stanza.attr.from; + if from_jid ~= rtbl_service_jid then + module:log("debug", "Ignoring RTBL response from unknown sender: %s", from_jid); + return; + end + local items_el = event.stanza:find("{http://jabber.org/protocol/pubsub}pubsub/items"); + if not items_el then + module:log("warn", "Invalid items response from RTBL service %s:%s", rtbl_service_jid, rtbl_node); + return; + end + + local old_entries = set.new(array.collect(it.keys(items))); + + local n_added, n_removed, n_total = 0, 0, 0; + for item in items_el:childtags("item") do + local hash = item.attr.id; + if hash then + n_total = n_total + 1; + if not old_entries:contains(hash) then + -- New entry + n_added = n_added + 1; + items[hash] = true; + notify("added", hash); + else + -- Entry already existed + old_entries:remove(hash); + end + end + end + + -- Remove old entries that weren't in the received list + for hash in old_entries do + n_removed = n_removed + 1; + items[hash] = nil; + notify("removed", hash); + end + + module:log("info", "%d RTBL entries received from %s:%s (%d added, %d removed)", n_total, from_jid, rtbl_node, n_added, n_removed); + return true; + end + + module:hook("iq-result/host/"..request_id, update_list); + module:add_timer(0, request_list); +end + +return { + new_rtbl_subscription = new_rtbl_subscription; +} diff -r 761142ee0ff2 -r 259ffdbf8906 mod_anti_spam/trie.lib.lua --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mod_anti_spam/trie.lib.lua Tue Mar 05 18:26:29 2024 +0000 @@ -0,0 +1,168 @@ +local bit = require "prosody.util.bitcompat"; + +local trie_methods = {}; +local trie_mt = { __index = trie_methods }; + +local function new_node() + return {}; +end + +function trie_methods:set(item, value) + local node = self.root; + for i = 1, #item do + local c = item:byte(i); + if not node[c] then + node[c] = new_node(); + end + node = node[c]; + end + node.terminal = true; + node.value = value; +end + +local function _remove(node, item, i) + if i > #item then + if node.terminal then + node.terminal = nil; + node.value = nil; + end + if next(node) ~= nil then + return node; + end + return nil; + end + local c = item:byte(i); + local child = node[c]; + local ret; + if child then + ret = _remove(child, item, i+1); + node[c] = ret; + end + if ret == nil and next(node) == nil then + return nil; + end + return node; +end + +function trie_methods:remove(item) + return _remove(self.root, item, 1); +end + +function trie_methods:get(item, partial) + local value; + local node = self.root; + local len = #item; + for i = 1, len do + if partial and node.terminal then + value = node.value; + end + local c = item:byte(i); + node = node[c]; + if not node then + return value, i - 1; + end + end + return node.value, len; +end + +function trie_methods:add(item) + return self:set(item, true); +end + +function trie_methods:contains(item, partial) + return self:get(item, partial) ~= nil; +end + +function trie_methods:longest_prefix(item) + return select(2, self:get(item)); +end + +function trie_methods:add_subnet(item, bits) + item = item.packed:sub(1, math.ceil(bits/8)); + local existing = self:get(item); + if not existing then + existing = { bits }; + return self:set(item, existing); + end + + -- Simple insertion sort + for i = 1, #existing do + local v = existing[i]; + if v == bits then + return; -- Already in there + elseif v > bits then + table.insert(existing, v, i); + return; + end + end +end + +function trie_methods:remove_subnet(item, bits) + item = item.packed:sub(1, math.ceil(bits/8)); + local existing = self:get(item); + if not existing then + return; + end + + -- Simple insertion sort + for i = 1, #existing do + local v = existing[i]; + if v == bits then + table.remove(existing, i); + break; + elseif v > bits then + return; -- Stop search + end + end + + if #existing == 0 then + self:remove(item); + end +end + +function trie_methods:has_ip(item) + item = item.packed; + local node = self.root; + local len = #item; + for i = 1, len do + if node.terminal then + return true; + end + + local c = item:byte(i); + local child = node[c]; + if not child then + for child_byte, child_node in pairs(node) do + if type(child_byte) == "number" and child_node.terminal then + local bits = child_node.value; + for j = #bits, 1, -1 do + local b = bits[j]-((i-1)*8); + if b ~= 8 then + local mask = bit.bnot(2^b-1); + if bit.band(bit.bxor(c, child_byte), mask) == 0 then + return true; + end + end + end + end + end + return false; + end + node = child; + end +end + +local function new() + return setmetatable({ + root = new_node(); + }, trie_mt); +end + +local function is_trie(o) + return getmetatable(o) == trie_mt; +end + +return { + new = new; + is_trie = is_trie; +};