You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@spamassassin.apache.org by bu...@bugzilla.spamassassin.org on 2014/10/29 09:59:02 UTC

[Bug 5652] bayes_seen - auto expire

https://issues.apache.org/SpamAssassin/show_bug.cgi?id=5652

Tomasz Ostrowski <to...@batory.org.pl> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
                 CC|                            |tometzky@batory.org.pl

--- Comment #14 from Tomasz Ostrowski <to...@batory.org.pl> ---
I've created a workaround in my system which is using PostgreSQL database for
bayes. It does not need any modifications in SpamAssassin nor additional cron
jobs.

I add additional column `ctime` to `bayes_seen` table with default `now()`. And
create a trigger on insert which runs with probability 1/10000 and deletes rows
with `ctime` older than a year and half.

alter table bayes_seen add column ctime timestamptz not null default now();
create or replace function bayes_seen_expire() returns trigger as $$
        declare
                run_probability float := 0.0001;
                remove_before timestamptz;
                remove_count integer;
        begin
                if random()<run_probability then
                        remove_before := current_date-'1.5 year'::interval;

                        -- We need to lock rows in some explicit order
                        -- because there's a small probability that
                        -- concurrent bayes_seen_expire() is running
                        -- which can cause a deadlock
                        select count(*) into remove_count from (
                                select * from bayes_seen
                                where ctime<remove_before
                                order by id, msgid
                                for update
                        ) as _;

                        if remove_count=0 then  
                                return NULL;
                        end if;
                        delete from bayes_seen where ctime<remove_before;
                end if;
                return NULL;
        end;
$$ language plpgsql;
create trigger bayes_seen_expire after insert on bayes_seen
        execute procedure bayes_seen_expire();

-- 
You are receiving this mail because:
You are the assignee for the bug.