mod_storage_internal: Lazy-load archive items while iterating
Very large list files previously ran into limits of the Lua parser, or
just caused Prosody to freeze while parsing.
Using the new index we can parse individual items one at a time. This
probably won't reduce overall CPU usage, probably the opposite, but it
will reduce the number of items in memory at once and allow collection
of items after we iterated past them.
local cache = require "prosody.util.cache";
local datamanager = require "prosody.core.storagemanager".olddm;
local array = require "prosody.util.array";
local datetime = require "prosody.util.datetime";
local st = require "prosody.util.stanza";
local now = require "prosody.util.time".now;
local id = require "prosody.util.id".medium;
local jid_join = require "prosody.util.jid".join;
local set = require "prosody.util.set";
local it = require "prosody.util.iterators";
local host = module.host;
local archive_item_limit = module:get_option_number("storage_archive_item_limit", 10000);
local archive_item_count_cache = cache.new(module:get_option("storage_archive_item_limit_cache_size", 1000));
local driver = {};
function driver:open(store, typ)
local mt = self[typ or "keyval"]
if not mt then
return nil, "unsupported-store";
end
return setmetatable({ store = store, type = typ }, mt);
end
function driver:stores(username) -- luacheck: ignore 212/self
return datamanager.stores(username, host);
end
function driver:purge(user) -- luacheck: ignore 212/self
return datamanager.purge(user, host);
end
local keyval = { };
driver.keyval = { __index = keyval };
function keyval:get(user)
return datamanager.load(user, host, self.store);
end
function keyval:set(user, data)
return datamanager.store(user, host, self.store, data);
end
function keyval:users()
return datamanager.users(host, self.store, self.type);
end
local archive = {};
driver.archive = { __index = archive };
archive.caps = {
total = true;
quota = archive_item_limit;
truncate = true;
full_id_range = true;
ids = true;
};
function archive:append(username, key, value, when, with)
when = when or now();
if not st.is_stanza(value) then
return nil, "unsupported-datatype";
end
value = st.preserialize(st.clone(value));
value.when = when;
value.with = with;
value.attr.stamp = datetime.datetime(when);
local cache_key = jid_join(username, host, self.store);
local item_count = archive_item_count_cache:get(cache_key);
if key then
local items, err = datamanager.list_load(username, host, self.store);
if not items and err then return items, err; end
-- Check the quota
item_count = items and #items or 0;
archive_item_count_cache:set(cache_key, item_count);
if item_count >= archive_item_limit then
module:log("debug", "%s reached or over quota, not adding to store", username);
return nil, "quota-limit";
end
if items then
-- Filter out any item with the same key as the one being added
items = array(items);
items:filter(function (item)
return item.key ~= key;
end);
value.key = key;
items:push(value);
local ok, err = datamanager.list_store(username, host, self.store, items);
if not ok then return ok, err; end
archive_item_count_cache:set(cache_key, #items);
return key;
end
else
if not item_count then -- Item count not cached?
-- We need to load the list to get the number of items currently stored
local items, err = datamanager.list_load(username, host, self.store);
if not items and err then return items, err; end
item_count = items and #items or 0;
archive_item_count_cache:set(cache_key, item_count);
end
if item_count >= archive_item_limit then
module:log("debug", "%s reached or over quota, not adding to store", username);
return nil, "quota-limit";
end
key = id();
end
module:log("debug", "%s has %d items out of %d limit in store %s", username, item_count, archive_item_limit, self.store);
value.key = key;
local ok, err = datamanager.list_append(username, host, self.store, value);
if not ok then return ok, err; end
archive_item_count_cache:set(cache_key, item_count+1);
return key;
end
function archive:find(username, query)
local list, err = datamanager.list_open(username, host, self.store);
if not list then
if err then
return list, err;
elseif query then
if query.before or query.after then
return nil, "item-not-found";
end
if query.total then
return function()
end, 0;
end
end
return function()
end;
end
local i = 0;
local iter = function()
i = i + 1;
return list[i]
end
if query then
if query.reverse then
i = #list + 1
iter = function()
i = i - 1
return list[i]
end
end
if query.key then
iter = it.filter(function(item)
return item.key == query.key;
end, iter);
end
if query.ids then
local ids = set.new(query.ids);
iter = it.filter(function(item)
return ids:contains(item.key);
end, iter);
end
if query.with then
iter = it.filter(function(item)
return item.with == query.with;
end, iter);
end
if query.start then
iter = it.filter(function(item)
local when = item.when or datetime.parse(item.attr.stamp);
return when >= query.start;
end, iter);
end
if query["end"] then
iter = it.filter(function(item)
local when = item.when or datetime.parse(item.attr.stamp);
return when <= query["end"];
end, iter);
end
if query.after then
local found = false;
iter = it.filter(function(item)
local found_after = found;
if item.key == query.after then
found = true
end
return found_after;
end, iter);
end
if query.before then
local found = false;
iter = it.filter(function(item)
if item.key == query.before then
found = true
end
return not found;
end, iter);
end
if query.limit then
iter = it.head(query.limit, iter);
end
end
return function()
local item = iter();
if item == nil then
return
end
local key = item.key;
local when = item.when or item.attr and datetime.parse(item.attr.stamp);
local with = item.with;
item.key, item.when, item.with = nil, nil, nil;
item.attr.stamp = nil;
-- COMPAT Stored data may still contain legacy XEP-0091 timestamp
item.attr.stamp_legacy = nil;
item = st.deserialize(item);
return key, item, when, with;
end
end
function archive:get(username, wanted_key)
local iter, err = self:find(username, { key = wanted_key })
if not iter then return iter, err; end
for key, stanza, when, with in iter do
if key == wanted_key then
return stanza, when, with;
end
end
return nil, "item-not-found";
end
function archive:set(username, key, new_value, new_when, new_with)
local items, err = datamanager.list_load(username, host, self.store);
if not items then
if err then
return items, err;
else
return nil, "item-not-found";
end
end
for i = 1, #items do
local old_item = items[i];
if old_item.key == key then
local item = st.preserialize(st.clone(new_value));
local when = new_when or old_item.when or datetime.parse(old_item.attr.stamp);
item.key = key;
item.when = when;
item.with = new_with or old_item.with;
item.attr.stamp = datetime.datetime(when);
items[i] = item;
return datamanager.list_store(username, host, self.store, items);
end
end
return nil, "item-not-found";
end
function archive:dates(username)
local items, err = datamanager.list_load(username, host, self.store);
if not items then return items, err; end
return array(items):pluck("when"):map(datetime.date):unique();
end
function archive:summary(username, query)
local iter, err = self:find(username, query)
if not iter then return iter, err; end
local counts = {};
local earliest = {};
local latest = {};
local body = {};
for _, stanza, when, with in iter do
counts[with] = (counts[with] or 0) + 1;
if earliest[with] == nil then
earliest[with] = when;
end
latest[with] = when;
body[with] = stanza:get_child_text("body") or body[with];
end
return {
counts = counts;
earliest = earliest;
latest = latest;
body = body;
};
end
function archive:users()
return datamanager.users(host, self.store, "list");
end
function archive:delete(username, query)
local cache_key = jid_join(username, host, self.store);
if not query or next(query) == nil then
archive_item_count_cache:set(cache_key, nil);
return datamanager.list_store(username, host, self.store, nil);
end
local items, err = datamanager.list_load(username, host, self.store);
if not items then
if err then
return items, err;
end
archive_item_count_cache:set(cache_key, 0);
-- Store is empty
return 0;
end
items = array(items);
local count_before = #items;
if query then
if query.key then
items:filter(function (item)
return item.key ~= query.key;
end);
end
if query.with then
items:filter(function (item)
return item.with ~= query.with;
end);
end
if query.start then
items:filter(function (item)
return item.when < query.start;
end);
end
if query["end"] then
items:filter(function (item)
return item.when > query["end"];
end);
end
if query.truncate and #items > query.truncate then
if query.reverse then
-- Before: { 1, 2, 3, 4, 5, }
-- After: { 1, 2, 3 }
for i = #items, query.truncate + 1, -1 do
items[i] = nil;
end
else
-- Before: { 1, 2, 3, 4, 5, }
-- After: { 3, 4, 5 }
local offset = #items - query.truncate;
for i = 1, #items do
items[i] = items[i+offset];
end
end
end
end
local count = count_before - #items;
if count == 0 then
return 0; -- No changes, skip write
end
local ok, err = datamanager.list_store(username, host, self.store, items);
if not ok then return ok, err; end
archive_item_count_cache:set(cache_key, #items);
return count;
end
module:provides("storage", driver);