You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by he...@apache.org on 2018/10/29 10:29:15 UTC

svn commit: r1845096 - in /spamassassin: branches/3.4/lib/Mail/ branches/3.4/lib/Mail/SpamAssassin/ branches/3.4/lib/Mail/SpamAssassin/Plugin/ trunk/lib/Mail/ trunk/lib/Mail/SpamAssassin/ trunk/lib/Mail/SpamAssassin/Plugin/

Author: hege
Date: Mon Oct 29 10:29:15 2018
New Revision: 1845096

URL: http://svn.apache.org/viewvc?rev=1845096&view=rev
Log:
Make RegistryBoundaries actually use 20_aux_tlds.cf, initialize it only after configuration is parsed. Fix plugins to handle valid_tlds_re at finish_parsing_end. Remove old hardcoded list, only sa-update is now supported.

Modified:
    spamassassin/branches/3.4/lib/Mail/SpamAssassin.pm
    spamassassin/branches/3.4/lib/Mail/SpamAssassin/Conf.pm
    spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/FreeMail.pm
    spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/HashBL.pm
    spamassassin/branches/3.4/lib/Mail/SpamAssassin/RegistryBoundaries.pm
    spamassassin/trunk/lib/Mail/SpamAssassin.pm
    spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm
    spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/FreeMail.pm
    spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/HashBL.pm
    spamassassin/trunk/lib/Mail/SpamAssassin/RegistryBoundaries.pm

Modified: spamassassin/branches/3.4/lib/Mail/SpamAssassin.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.4/lib/Mail/SpamAssassin.pm?rev=1845096&r1=1845095&r2=1845096&view=diff
==============================================================================
--- spamassassin/branches/3.4/lib/Mail/SpamAssassin.pm (original)
+++ spamassassin/branches/3.4/lib/Mail/SpamAssassin.pm Mon Oct 29 10:29:15 2018
@@ -429,7 +429,6 @@ sub new {
   }
 
   $self->{conf} ||= new Mail::SpamAssassin::Conf ($self);
-  $self->{registryboundaries} = Mail::SpamAssassin::RegistryBoundaries->new ($self);
   $self->{plugins} = Mail::SpamAssassin::PluginHandler->new ($self);
 
   $self->{save_pattern_hits} ||= 0;
@@ -2120,14 +2119,22 @@ sub have_plugin {
 
 sub call_plugins {
   my $self = shift;
+  my $subname = shift;
 
   # We could potentially get called after a finish(), so just return.
   return unless $self->{plugins};
 
+  # Use some calls ourself too
+  if ($subname eq 'finish_parsing_end') {
+    # Initialize RegistryBoundaries, now that util_rb_tld etc from config is
+    # read.  Plugins can also now use {valid_tlds_re} to one time compile
+    # regexes in finish_parsing_end.
+    $self->{registryboundaries} = Mail::SpamAssassin::RegistryBoundaries->new ($self);
+  }
+
   # safety net in case some plugin changes global settings, Bug 6218
   local $/ = $/;  # prevent underlying modules from changing the global $/
 
-  my $subname = shift;
   return $self->{plugins}->callback($subname, @_);
 }
 

Modified: spamassassin/branches/3.4/lib/Mail/SpamAssassin/Conf.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.4/lib/Mail/SpamAssassin/Conf.pm?rev=1845096&r1=1845095&r2=1845096&view=diff
==============================================================================
--- spamassassin/branches/3.4/lib/Mail/SpamAssassin/Conf.pm (original)
+++ spamassassin/branches/3.4/lib/Mail/SpamAssassin/Conf.pm Mon Oct 29 10:29:15 2018
@@ -3540,67 +3540,6 @@ TLDs include things like com, net, org,
 
 =cut
 
-  # DO NOT UPDATE THIS HARDCODED LIST!! It is only as fallback for
-  # transitional period and to be removed later.  TLDs are now maintained in
-  # sa-update 20_aux_tlds.cf.
-  foreach (qw/
-    ac academy accountants active actor ad ae aero af ag agency ai airforce al am an
-    ao aq ar archi army arpa as asia associates at attorney au auction audio autos
-    aw ax axa az ba bar bargains bayern bb bd be beer berlin best bf bg bh bi bid
-    bike bio biz bj black blackfriday blue bm bmw bn bnpparibas bo boo boutique br
-    brussels bs bt build builders business buzz bv bw by bz bzh ca cab camera camp
-    cancerresearch capetown capital caravan cards care career careers cash cat
-    catering cc cd center ceo cern cf cg ch cheap christmas church ci citic city ck
-    cl claims cleaning click clinic clothing club cm cn co codes coffee college
-    cologne com community company computer condos construction consulting
-    contractors cooking cool coop country cr credit creditcard cruises cu cuisinella
-    cv cw cx cy cymru cz dad dance dating day de deals degree democrat dental
-    dentist desi diamonds diet digital direct directory discount dj dk dm dnp do
-    domains durban dz eat ec edu education ee eg email engineer engineering
-    enterprises equipment er es esq estate et eu eus events exchange expert exposed
-    fail farm feedback fi finance financial fish fishing fitness fj fk flights
-    florist fm fo foo foundation fr frl frogans fund furniture futbol ga gal gallery
-    gb gbiz gd ge gent gf gg gh gi gift gifts gives gl glass global globo gm gmail
-    gmo gn gop gov gp gq gr graphics gratis green gripe gs gt gu guide guitars guru
-    gw gy hamburg haus healthcare help here hiphop hiv hk hm hn holdings holiday
-    homes horse host hosting house how hr ht hu id ie il im immo immobilien in
-    industries info ing ink institute insure int international investments io iq ir
-    is it je jetzt jm jo jobs joburg jp juegos kaufen ke kg kh ki kim kitchen kiwi
-    km kn koeln kp kr krd kred kw ky kz la lacaixa land lawyer lb lc lease lgbt li
-    life lighting limited limo link lk loans london lotto lr ls lt ltda lu luxe
-    luxury lv ly ma maison management mango market marketing mc md me media meet
-    melbourne meme menu mg mh miami mil mini mk ml mm mn mo mobi moda moe monash
-    mortgage moscow motorcycles mov mp mq mr ms mt mu museum mv mw mx my mz na
-    nagoya name navy nc ne net network neustar new nf ng ngo nhk ni ninja nl no np
-    nr nra nrw nu nyc nz okinawa om ong onl ooo org organic otsuka ovh pa paris
-    partners parts pe pf pg ph photo photography photos physio pics pictures pink
-    pizza pk pl place plumbing pm pn post pr praxi press pro prod productions
-    properties property ps pt pub pw py qa qpon quebec re realtor recipes red rehab
-    reise reisen ren rentals repair report republican rest restaurant reviews rich
-    rio ro rocks rodeo rs rsvp ru ruhr rw ryukyu sa saarland sarl sb sc sca scb
-    schmidt schule scot sd se services sexy sg sh shiksha shoes si singles sj sk sl
-    sm sn so social software sohu solar solutions soy space spiegel sr st su
-    supplies supply support surf surgery suzuki sv sx sy systems sz tatar tattoo tax
-    tc td technology tel tf tg th tienda tips tirol tj tk tl tm tn to today tokyo
-    tools top town toys tr trade training travel tt tv tw tz ua ug uk university
-    uno uol us uy uz va vacations vc ve vegas ventures versicherung vet vg vi viajes
-    villas vision vlaanderen vn vodka vote voting voto voyage vu wales wang watch
-    webcam website wed wf whoswho wien wiki williamhill works ws wtc wtf xn--1qqw23a
-    xn--3bst00m xn--3ds443g xn--3e0b707e xn--45brj9c xn--4gbrim xn--55qw42g
-    xn--55qx5d xn--6frz82g xn--6qq986b3xl xn--80adxhks xn--80ao21a xn--80asehdb
-    xn--80aswg xn--90a3ac xn--c1avg xn--cg4bki xn--clchc0ea0b2g2a9gcd xn--czr694b
-    xn--czru2d xn--d1acj3b xn--fiq228c5hs xn--fiq64b xn--fiqs8s xn--fiqz9s
-    xn--fpcrj9c3d xn--fzc2c9e2c xn--gecrj9c xn--h2brj9c xn--i1b6b1a6a2e xn--io0a7i
-    xn--j1amh xn--j6w193g xn--kprw13d xn--kpry57d xn--kput3i xn--l1acc
-    xn--lgbbat1ad8j xn--mgb9awbf xn--mgba3a4f16a xn--mgbaam7a8h xn--mgbab2bd
-    xn--mgbayh7gpa xn--mgbbh1a71e xn--mgbc0a9azcg xn--mgberp4a5d4ar xn--mgbx4cd0ab
-    xn--ngbc5azd xn--nqv7f xn--nqv7fs00ema xn--o3cw4h xn--ogbpf8fl xn--p1ai
-    xn--pgbs0dh xn--q9jyb4c xn--rhqv96g xn--s9brj9c xn--ses554g xn--unup4y xn--vhquv
-    xn--wgbh1c xn--wgbl6a xn--xhq521b xn--xkc2al3hye2a xn--xkc2dl3a5ee0h
-    xn--yfro4i67o xn--ygbi2ammx xn--zfr164b xxx xyz yachts yandex ye yokohama
-    youtube yt za zm zone zw
-    /) { $self->{valid_tlds}{lc $_} = 1; }
-
   push (@cmds, {
     setting => 'util_rb_tld',
     is_admin => 1,
@@ -3615,7 +3554,6 @@ TLDs include things like com, net, org,
       foreach (split(/\s+/, $value)) {
         $self->{valid_tlds}{lc $_} = 1;
       }
-      dbg("config: added tld list - $value");
     }
   });
 
@@ -3626,175 +3564,6 @@ code.  2TLDs include things like co.uk,
 
 =cut
 
-  # DO NOT UPDATE THIS HARDCODED LIST!! It is only as fallback for
-  # transitional period and to be removed later.  TLDs are now maintained in
-  # sa-update 20_aux_tlds.cf.
-  foreach (qw/
-    com.ac edu.ac gov.ac mil.ac net.ac org.ac nom.ad ac.ae co.ae com.ae gov.ae
-    mil.ae name.ae net.ae org.ae pro.ae sch.ae com.af edu.af gov.af net.af
-    co.ag com.ag net.ag nom.ag org.ag com.ai edu.ai gov.ai net.ai off.ai
-    org.ai com.al edu.al gov.al net.al org.al com.an edu.an net.an org.an
-    co.ao ed.ao gv.ao it.ao og.ao pb.ao com.ar edu.ar gov.ar int.ar mil.ar
-    net.ar org.ar e164.arpa in-addr.arpa ip6.arpa iris.arpa uri.arpa urn.arpa
-    ac.at co.at gv.at or.at priv.at act.au asn.au com.au conf.au csiro.au
-    edu.au gov.au id.au info.au net.au nsw.au nt.au org.au otc.au oz.au qld.au
-    sa.au tas.au telememo.au vic.au wa.au com.aw biz.az com.az edu.az gov.az
-    info.az int.az mil.az name.az net.az org.az pp.az co.ba com.ba edu.ba
-    gov.ba mil.ba net.ba org.ba rs.ba unbi.ba unsa.ba com.bb edu.bb gov.bb
-    net.bb org.bb ac.bd com.bd edu.bd gov.bd mil.bd net.bd org.bd ac.be
-    belgie.be dns.be fgov.be gov.bf biz.bh cc.bh com.bh edu.bh gov.bh info.bh
-    net.bh org.bh com.bm edu.bm gov.bm net.bm org.bm com.bn edu.bn net.bn
-    org.bn com.bo edu.bo gob.bo gov.bo int.bo mil.bo net.bo org.bo tv.bo
-    adm.br adv.br agr.br am.br arq.br art.br ato.br bio.br bmd.br cim.br
-    cng.br cnt.br com.br coop.br dpn.br eco.br ecn.br edu.br eng.br esp.br
-    etc.br eti.br far.br fm.br fnd.br fot.br fst.br g12.br ggf.br gov.br
-    imb.br ind.br inf.br jor.br lel.br mat.br med.br mil.br mus.br net.br
-    nom.br not.br ntr.br odo.br org.br ppg.br pro.br psc.br psi.br qsl.br
-    rec.br slg.br srv.br tmp.br trd.br tur.br tv.br vet.br zlg.br com.bs
-    net.bs org.bs com.bt edu.bt gov.bt net.bt org.bt co.bw org.bw gov.by
-    mil.by com.bz net.bz org.bz ab.ca bc.ca gc.ca mb.ca nb.ca nf.ca nl.ca
-    ns.ca nt.ca nu.ca on.ca pe.ca qc.ca sk.ca yk.ca co.ck edu.ck gov.ck net.ck
-    org.ck ac.cn ah.cn bj.cn com.cn cq.cn edu.cn fj.cn gd.cn gov.cn gs.cn
-    gx.cn gz.cn ha.cn hb.cn he.cn hi.cn hk.cn hl.cn hn.cn jl.cn js.cn jx.cn
-    ln.cn mo.cn net.cn nm.cn nx.cn org.cn qh.cn sc.cn sd.cn sh.cn sn.cn sx.cn
-    tj.cn tw.cn xj.cn xz.cn yn.cn zj.cn arts.co com.co edu.co firm.co gov.co
-    info.co int.co mil.co net.co nom.co org.co rec.co web.co lkd.co.im
-    ltd.co.im plc.co.im co.cm com.cm net.cm au.com br.com cn.com de.com eu.com
-    gb.com hu.com no.com qc.com ru.com sa.com se.com uk.com us.com uy.com
-    za.com ac.cr co.cr ed.cr fi.cr go.cr or.cr sa.cr com.cu edu.cu gov.cu
-    inf.cu net.cu org.cu gov.cx ac.cy biz.cy com.cy ekloges.cy gov.cy ltd.cy
-    name.cy net.cy org.cy parliament.cy press.cy pro.cy tm.cy co.dk com.dm
-    edu.dm gov.dm net.dm org.dm art.do com.do edu.do gob.do gov.do mil.do
-    net.do org.do sld.do web.do art.dz asso.dz com.dz edu.dz gov.dz net.dz
-    org.dz pol.dz com.ec edu.ec fin.ec gov.ec info.ec k12.ec med.ec mil.ec
-    net.ec org.ec pro.ec gob.ec co.ee com.ee edu.ee fie.ee med.ee org.ee
-    pri.ee com.eg edu.eg eun.eg gov.eg mil.eg net.eg org.eg sci.eg com.er
-    edu.er gov.er ind.er mil.er net.er org.er com.es edu.es gob.es nom.es
-    org.es biz.et com.et edu.et gov.et info.et name.et net.et org.et aland.fi
-    ac.fj biz.fj com.fj gov.fj id.fj info.fj mil.fj name.fj net.fj org.fj
-    pro.fj school.fj ac.fk co.fk com.fk gov.fk net.fk nom.fk org.fk tm.fr
-    asso.fr nom.fr prd.fr presse.fr com.fr gouv.fr com.ge edu.ge gov.ge mil.ge
-    net.ge org.ge pvt.ge ac.gg alderney.gg co.gg gov.gg guernsey.gg ind.gg
-    ltd.gg net.gg org.gg sark.gg sch.gg com.gh edu.gh gov.gh mil.gh org.gh
-    com.gi edu.gi gov.gi ltd.gi mod.gi org.gi ac.gn com.gn gov.gn net.gn
-    org.gn asso.gp com.gp edu.gp net.gp org.gp com.gr edu.gr gov.gr net.gr
-    org.gr com.gt edu.gt gob.gt ind.gt mil.gt net.gt org.gt com.gu edu.gu
-    gov.gu mil.gu net.gu org.gu com.hk edu.hk gov.hk idv.hk net.hk org.hk
-    com.hn edu.hn gob.hn mil.hn net.hn org.hn com.hr from.hr iz.hr name.hr
-    adult.ht art.ht asso.ht com.ht coop.ht edu.ht firm.ht gouv.ht info.ht
-    med.ht net.ht org.ht perso.ht pol.ht pro.ht rel.ht shop.ht 2000.hu
-    agrar.hu bolt.hu casino.hu city.hu co.hu erotica.hu erotika.hu film.hu
-    forum.hu games.hu hotel.hu info.hu ingatlan.hu jogasz.hu konyvelo.hu
-    lakas.hu media.hu news.hu org.hu priv.hu reklam.hu sex.hu shop.hu sport.hu
-    suli.hu szex.hu tm.hu tozsde.hu utazas.hu video.hu ac.id co.id go.id
-    mil.id net.id or.id sch.id web.id gov.ie ac.il co.il gov.il idf.il k12.il
-    muni.il net.il org.il ac.im co.im gov.im net.im nic.im org.im ac.in co.in
-    edu.in ernet.in firm.in gen.in gov.in ind.in mil.in net.in nic.in org.in
-    res.in com.io gov.io mil.io net.io org.io ac.ir co.ir gov.ir id.ir net.ir
-    org.ir sch.ir edu.it gov.it ac.je co.je gov.je ind.je jersey.je ltd.je
-    net.je org.je sch.je com.jm edu.jm gov.jm net.jm org.jm com.jo edu.jo
-    gov.jo mil.jo net.jo org.jo ac.jp ad.jp aichi.jp akita.jp aomori.jp
-    chiba.jp co.jp ed.jp ehime.jp fukui.jp fukuoka.jp fukushima.jp gifu.jp
-    go.jp gov.jp gr.jp gunma.jp hiroshima.jp hokkaido.jp hyogo.jp ibaraki.jp
-    ishikawa.jp iwate.jp kagawa.jp kagoshima.jp kanagawa.jp kanazawa.jp
-    kawasaki.jp kitakyushu.jp kobe.jp kochi.jp kumamoto.jp kyoto.jp lg.jp
-    matsuyama.jp mie.jp miyagi.jp miyazaki.jp nagano.jp nagasaki.jp nagoya.jp
-    nara.jp ne.jp net.jp niigata.jp oita.jp okayama.jp okinawa.jp or.jp org.jp
-    osaka.jp saga.jp saitama.jp sapporo.jp sendai.jp shiga.jp shimane.jp
-    shizuoka.jp takamatsu.jp tochigi.jp tokushima.jp tokyo.jp tottori.jp
-    toyama.jp utsunomiya.jp wakayama.jp yamagata.jp yamaguchi.jp yamanashi.jp
-    yokohama.jp ac.ke co.ke go.ke ne.ke new.ke or.ke sc.ke com.kg edu.kg
-    gov.kg mil.kg net.kg org.kg com.kh edu.kh gov.kh mil.kh net.kh org.kh
-    per.kh ac.kr busan.kr chungbuk.kr chungnam.kr co.kr daegu.kr daejeon.kr
-    es.kr gangwon.kr go.kr gwangju.kr gyeongbuk.kr gyeonggi.kr gyeongnam.kr
-    hs.kr incheon.kr jeju.kr jeonbuk.kr jeonnam.kr kg.kr kyonggi.kr mil.kr
-    ms.kr ne.kr or.kr pe.kr re.kr sc.kr seoul.kr ulsan.kr com.kw edu.kw gov.kw
-    mil.kw net.kw org.kw com.ky edu.ky gov.ky net.ky org.ky com.kz edu.kz
-    gov.kz mil.kz net.kz org.kz com.la net.la org.la com.lb edu.lb gov.lb
-    mil.lb net.lb org.lb com.lc edu.lc gov.lc net.lc org.lc assn.lk com.lk
-    edu.lk gov.lk grp.lk hotel.lk int.lk ltd.lk net.lk ngo.lk org.lk sch.lk
-    soc.lk web.lk com.lr edu.lr gov.lr net.lr org.lr co.ls org.ls gov.lt
-    mil.lt asn.lv com.lv conf.lv edu.lv gov.lv id.lv mil.lv net.lv org.lv
-    biz.ly com.ly edu.ly gov.ly id.ly med.ly net.ly org.ly plc.ly sch.ly ac.ma
-    co.ma gov.ma net.ma org.ma press.ma asso.mc tm.mc ac.me co.me edu.me
-    gov.me its.me net.me org.me priv.me com.mg edu.mg gov.mg mil.mg nom.mg
-    org.mg prd.mg tm.mg army.mil navy.mil com.mk org.mk com.mm edu.mm gov.mm
-    net.mm org.mm edu.mn gov.mn org.mn com.mo edu.mo gov.mo net.mo org.mo
-    music.mobi weather.mobi co.mp edu.mp gov.mp net.mp org.mp com.mt edu.mt
-    gov.mt net.mt org.mt tm.mt uu.mt co.mu com.mu aero.mv biz.mv com.mv
-    coop.mv edu.mv gov.mv info.mv int.mv mil.mv museum.mv name.mv net.mv
-    org.mv pro.mv ac.mw co.mw com.mw coop.mw edu.mw gov.mw int.mw museum.mw
-    net.mw org.mw com.mx edu.mx gob.mx net.mx org.mx com.my edu.my gov.my
-    mil.my name.my net.my org.my alt.na com.na cul.na edu.na net.na org.na
-    telecom.na unam.na com.nc net.nc org.nc de.net gb.net uk.net ac.ng com.ng
-    edu.ng gov.ng net.ng org.ng sch.ng ac.ni biz.ni com.ni edu.ni gob.ni in.ni
-    info.ni int.ni mil.ni net.ni nom.ni org.ni web.ni fhs.no folkebibl.no
-    fylkesbibl.no herad.no idrett.no kommune.no mil.no museum.no priv.no
-    stat.no tel.no vgs.no com.np edu.np gov.np mil.np net.np org.np biz.nr
-    co.nr com.nr edu.nr fax.nr gov.nr info.nr mob.nr mobil.nr mobile.nr net.nr
-    org.nr tel.nr tlf.nr ac.nz co.nz cri.nz geek.nz gen.nz govt.nz iwi.nz
-    maori.nz mil.nz net.nz org.nz school.nz ac.om biz.om co.om com.om edu.om
-    gov.om med.om mil.om mod.om museum.om net.om org.om pro.om sch.om dk.org
-    eu.org abo.pa ac.pa com.pa edu.pa gob.pa ing.pa med.pa net.pa nom.pa
-    org.pa sld.pa com.pe edu.pe gob.pe mil.pe net.pe nom.pe org.pe com.pf
-    edu.pf org.pf ac.pg com.pg net.pg com.ph edu.ph gov.ph mil.ph net.ph
-    ngo.ph org.ph biz.pk com.pk edu.pk fam.pk gob.pk gok.pk gon.pk gop.pk
-    gos.pk gov.pk net.pk org.pk web.pk art.pl biz.pl com.pl edu.pl gov.pl
-    info.pl mil.pl net.pl ngo.pl org.pl biz.pr com.pr edu.pr gov.pr info.pr
-    isla.pr name.pr net.pr org.pr pro.pr cpa.pro law.pro med.pro com.ps edu.ps
-    gov.ps net.ps org.ps plo.ps sec.ps com.pt edu.pt gov.pt int.pt net.pt
-    nome.pt org.pt publ.pt com.py edu.py gov.py net.py org.py com.qa edu.qa
-    gov.qa net.qa org.qa asso.re com.re nom.re arts.ro com.ro firm.ro info.ro
-    nom.ro nt.ro org.ro rec.ro store.ro tm.ro www.ro ac.rs co.rs edu.rs gov.rs
-    in.rs org.rs ac.ru com.ru edu.ru gov.ru int.ru mil.ru net.ru org.ru pp.ru
-    ac.rw co.rw com.rw edu.rw gouv.rw gov.rw int.rw mil.rw net.rw com.sa
-    edu.sa gov.sa med.sa net.sa org.sa pub.sa sch.sa com.sb edu.sb gov.sb
-    net.sb org.sb com.sc edu.sc gov.sc net.sc org.sc com.sd edu.sd gov.sd
-    info.sd med.sd net.sd org.sd sch.sd tv.sd ab.se ac.se bd.se brand.se c.se
-    d.se e.se f.se fh.se fhsk.se fhv.se g.se h.se i.se k.se komforb.se
-    kommunalforbund.se komvux.se lanarb.se lanbib.se m.se mil.se n.se
-    naturbruksgymn.se o.se org.se parti.se pp.se press.se s.se sshn.se t.se
-    tm.se u.se w.se x.se y.se z.se com.sg edu.sg gov.sg idn.sg net.sg org.sg
-    per.sg com.sh edu.sh gov.sh mil.sh net.sh org.sh edu.sk gov.sk mil.sk
-    co.st com.st consulado.st edu.st embaixada.st gov.st mil.st net.st org.st
-    principe.st saotome.st store.st com.sv edu.sv gob.sv org.sv red.sv com.sy
-    gov.sy net.sy org.sy at.tf bg.tf ca.tf ch.tf cz.tf de.tf edu.tf eu.tf
-    int.tf net.tf pl.tf ru.tf sg.tf us.tf ac.th co.th go.th in.th mi.th net.th
-    or.th ac.tj biz.tj co.tj com.tj edu.tj go.tj gov.tj int.tj mil.tj name.tj
-    net.tj org.tj web.tj com.tn edunet.tn ens.tn fin.tn gov.tn ind.tn info.tn
-    intl.tn nat.tn net.tn org.tn rnrt.tn rns.tn rnu.tn tourism.tn gov.to
-    av.tr bbs.tr bel.tr biz.tr com.tr dr.tr edu.tr gen.tr gov.tr
-    info.tr k12.tr mil.tr name.tr net.tr org.tr pol.tr tel.tr web.tr aero.tt
-    at.tt au.tt be.tt biz.tt ca.tt co.tt com.tt coop.tt de.tt dk.tt edu.tt
-    es.tt eu.tt fr.tt gov.tt info.tt int.tt it.tt jobs.tt mobi.tt museum.tt
-    name.tt net.tt nic.tt org.tt pro.tt se.tt travel.tt uk.tt us.tt co.tv
-    gov.tv club.tw com.tw ebiz.tw edu.tw game.tw gov.tw idv.tw mil.tw net.tw
-    org.tw ac.tz co.tz go.tz ne.tz or.tz cherkassy.ua chernigov.ua
-    chernovtsy.ua ck.ua cn.ua co.ua com.ua crimea.ua cv.ua dn.ua
-    dnepropetrovsk.ua donetsk.ua dp.ua edu.ua gov.ua if.ua in.ua
-    ivano-frankivsk.ua kh.ua kharkov.ua kherson.ua khmelnitskiy.ua kiev.ua
-    kirovograd.ua km.ua kr.ua ks.ua kv.ua lg.ua lugansk.ua lutsk.ua lviv.ua
-    mk.ua net.ua nikolaev.ua od.ua odessa.ua org.ua pl.ua poltava.ua rovno.ua
-    rv.ua sebastopol.ua sumy.ua te.ua ternopil.ua uzhgorod.ua vinnica.ua vn.ua
-    zaporizhzhe.ua zhitomir.ua zp.ua zt.ua ac.ug co.ug go.ug ne.ug or.ug sc.ug
-    ac.uk bl.uk british-library.uk co.uk edu.uk gov.uk icnet.uk jet.uk ltd.uk
-    me.uk mod.uk national-library-scotland.uk net.uk nhs.uk nic.uk nls.uk
-    org.uk parliament.uk plc.uk police.uk sch.uk ak.us al.us ar.us az.us ca.us
-    co.us ct.us dc.us de.us dni.us fed.us fl.us ga.us hi.us ia.us id.us il.us
-    in.us isa.us kids.us ks.us ky.us la.us ma.us md.us me.us mi.us mn.us mo.us
-    ms.us mt.us nc.us nd.us ne.us nh.us nj.us nm.us nsn.us nv.us ny.us oh.us
-    ok.us or.us pa.us ri.us sc.us sd.us tn.us tx.us ut.us va.us vt.us wa.us
-    wi.us wv.us wy.us com.uy edu.uy gub.uy mil.uy net.uy org.uy vatican.va
-    arts.ve bib.ve co.ve com.ve edu.ve firm.ve gov.ve info.ve int.ve mil.ve
-    net.ve nom.ve org.ve rec.ve store.ve tec.ve web.ve co.vi com.vi edu.vi
-    gov.vi net.vi org.vi ac.vn biz.vn com.vn edu.vn gov.vn health.vn info.vn
-    int.vn name.vn net.vn org.vn pro.vn ch.vu com.vu de.vu edu.vu fr.vu net.vu
-    org.vu com.ws edu.ws gov.ws net.ws org.ws com.ye edu.ye gov.ye mil.ye
-    net.ye org.ye ac.za alt.za bourse.za city.za co.za edu.za gov.za law.za
-    mil.za net.za ngo.za nom.za org.za school.za tm.za web.za ac.zm co.zm
-    com.zm edu.zm gov.zm org.zm sch.zm ac.zw co.zw gov.zw org.zw
-    /) { $self->{two_level_domains}{lc $_} = 1; }
-
   push (@cmds, {
     setting => 'util_rb_2tld',
     is_admin => 1,
@@ -3819,13 +3588,6 @@ code.  3TLDs include things like demon.c
 
 =cut
 
-  # DO NOT UPDATE THIS HARDCODED LIST!! It is only as fallback for
-  # transitional period and to be removed later.  TLDs are now maintained in
-  # sa-update 20_aux_tlds.cf.
-  foreach (qw/
-    demon.co.uk esc.edu.ar lkd.co.im plc.co.im
-    /) { $self->{three_level_domains}{lc $_} = 1; }
-
   push (@cmds, {
     setting => 'util_rb_3tld',
     is_admin => 1,

Modified: spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/FreeMail.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/FreeMail.pm?rev=1845096&r1=1845095&r2=1845096&view=diff
==============================================================================
--- spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/FreeMail.pm (original)
+++ spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/FreeMail.pm Mon Oct 29 10:29:15 2018
@@ -155,7 +155,14 @@ sub new {
     $self->register_eval_rule("check_freemail_header");
     $self->register_eval_rule("check_freemail_body");
 
-    # Need to init the regex here, utilizing registryboundaries->valid_tlds_re
+    return $self;
+}
+
+sub _init_email_regex {
+    my ($self) = @_;
+
+    dbg("initializing email regex");
+
     # Some regexp tips courtesy of http://www.regular-expressions.info/email.html
     # full email regex v0.02
     $self->{email_regex} = qr/
@@ -168,10 +175,7 @@ sub new {
       (?:[a-z0-9](?:[a-z0-9-]{0,59}[a-z0-9])?\.){1,4} # max 4x61 char parts (should be enough?)
       $self->{main}->{registryboundaries}->{valid_tlds_re}	# ends with valid tld
       )
-      (?!(?:[a-z0-9-]|\.[a-z0-9]))		# make sure domain ends here
     /xi;
-
-    return $self;
 }
 
 sub set_config {
@@ -276,7 +280,8 @@ sub finish_parsing_end {
         my $doms = join('|', @domains);
         $self->{freemail_domains_re} = qr/\@(?:${doms})$/;
         $wcount = scalar @domains;
-        undef %{$self->{freemail_temp_wc}};
+        undef $self->{freemail_temp_wc};
+        delete $self->{freemail_temp_wc};
     }
 
     my $count = scalar keys %{$self->{freemail_domains}};
@@ -293,6 +298,12 @@ sub finish_parsing_end {
         $self->{freemail_available} = 0;
     }
 
+    # valid_tlds_re will be available at finish_parsing_end, compile it now,
+    # we only need to do it once and before possible forking
+    if ($self->{freemail_available} && !$self->{email_regex}) {
+        $self->_init_email_regex();
+    }
+
     return 0;
 }
 

Modified: spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/HashBL.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/HashBL.pm?rev=1845096&r1=1845095&r2=1845096&view=diff
==============================================================================
--- spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/HashBL.pm (original)
+++ spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/HashBL.pm Mon Oct 29 10:29:15 2018
@@ -77,7 +77,26 @@ sub new {
     $self->set_config($mailsa->{conf});
     $self->register_eval_rule("check_hashbl_emails");
 
-    # Need to init the regex here, utilizing registryboundaries->valid_tlds_re
+    return $self;
+}
+
+sub finish_parsing_end {
+    my ($self, $opts) = @_;
+
+    # valid_tlds_re will be available at finish_parsing_end, compile it now,
+    # we only need to do it once and before possible forking
+    if ($self->{hashbl_available} && !$self->{email_regex}) {
+      $self->_init_email_regex();
+    }
+
+    return 0;
+}
+
+sub _init_email_regex {
+    my ($self) = @_;
+
+    dbg("initializing email regex");
+
     # Some regexp tips courtesy of http://www.regular-expressions.info/email.html
     # full email regex v0.02
     $self->{email_regex} = qr/
@@ -90,10 +109,7 @@ sub new {
       (?:[a-z0-9](?:[a-z0-9-]{0,59}[a-z0-9])?\.){1,4} # max 4x61 char parts (should be enough?)
       $self->{main}->{registryboundaries}->{valid_tlds_re}	# ends with valid tld
       )
-      (?!(?:[a-z0-9-]|\.[a-z0-9]))		# make sure domain ends here
     /xi;
-
-    return $self;
 }
 
 sub set_config {

Modified: spamassassin/branches/3.4/lib/Mail/SpamAssassin/RegistryBoundaries.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.4/lib/Mail/SpamAssassin/RegistryBoundaries.pm?rev=1845096&r1=1845095&r2=1845096&view=diff
==============================================================================
--- spamassassin/branches/3.4/lib/Mail/SpamAssassin/RegistryBoundaries.pm (original)
+++ spamassassin/branches/3.4/lib/Mail/SpamAssassin/RegistryBoundaries.pm Mon Oct 29 10:29:15 2018
@@ -32,6 +32,8 @@ use re 'taint';
 
 our @ISA = qw();
 
+use Mail::SpamAssassin::Logger;
+
 # called from SpamAssassin->init() to create $self->{util_rb}
 sub new {
   my $class = shift;
@@ -45,14 +47,25 @@ sub new {
   bless ($self, $class);
 
   # Initialize valid_tlds_re for schemeless uri parsing, FreeMail etc
-  if ($self->{conf}->{valid_tlds}) {
-    my $tlds = join('|', keys %{$self->{conf}->{valid_tlds}});
+  if ($self->{conf}->{valid_tlds} && %{$self->{conf}->{valid_tlds}}) {
+    # International domain names are already in ASCII-compatible encoding (ACE)
+    my $tlds = 
+      '(?<![a-zA-Z0-9-])'. # make sure tld starts at boundary
+      join('|', keys %{$self->{conf}->{valid_tlds}}).
+      '(?!(?:[a-zA-Z0-9-]|\.[a-zA-Z0-9]))'; # make sure it ends
     # Perl 5.10+ trie optimizes lists, no need for fancy regex optimizing
-    $self->{valid_tlds_re} = qr/(?:$tlds)/i;
+    if (eval { $self->{valid_tlds_re} = qr/$tlds/; 1; }) {
+      dbg("config: registryboundaries: %d tlds loaded",
+        scalar keys %{$self->{conf}->{valid_tlds}});
+    } else {
+      warn "config: registryboundaries: failed to compile valid_tlds_re: $@\n";
+      $self->{valid_tlds_re} = qr/no_tlds_defined/;
+    }
   }
   else {
     # Failsafe in case no tlds defined, we don't want this to match everything..
     $self->{valid_tlds_re} = qr/no_tlds_defined/;
+    warn "config: registryboundaries: no tlds defined, need to run sa-update\n";
   }
 
   $self;

Modified: spamassassin/trunk/lib/Mail/SpamAssassin.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin.pm?rev=1845096&r1=1845095&r2=1845096&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin.pm Mon Oct 29 10:29:15 2018
@@ -428,7 +428,6 @@ sub new {
   }
 
   $self->{conf} ||= new Mail::SpamAssassin::Conf ($self);
-  $self->{registryboundaries} = Mail::SpamAssassin::RegistryBoundaries->new ($self);
   $self->{plugins} = Mail::SpamAssassin::PluginHandler->new ($self);
 
   $self->{save_pattern_hits} ||= 0;
@@ -2195,6 +2194,11 @@ sub call_plugins {
   if ($self->{spamd} && $subname eq 'spamd_child_after_non_root') {
     # set global dir now if spamd
     $self->set_global_state_dir();
+  } elsif ($subname eq 'finish_parsing_end') {
+    # Initialize RegistryBoundaries, now that util_rb_tld etc from config is
+    # read.  Plugins can also now use {valid_tlds_re} to one time compile
+    # regexes in finish_parsing_end.
+    $self->{registryboundaries} = Mail::SpamAssassin::RegistryBoundaries->new ($self);
   }
 
   # safety net in case some plugin changes global settings, Bug 6218

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm?rev=1845096&r1=1845095&r2=1845096&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm Mon Oct 29 10:29:15 2018
@@ -3620,67 +3620,6 @@ e.g. рф, ελ.
 
 =cut
 
-  # DO NOT UPDATE THIS HARDCODED LIST!! It is only as fallback for
-  # transitional period and to be removed later.  TLDs are now maintained in
-  # sa-update 20_aux_tlds.cf.
-  foreach (qw/
-    ac academy accountants active actor ad ae aero af ag agency ai airforce al am an
-    ao aq ar archi army arpa as asia associates at attorney au auction audio autos
-    aw ax axa az ba bar bargains bayern bb bd be beer berlin best bf bg bh bi bid
-    bike bio biz bj black blackfriday blue bm bmw bn bnpparibas bo boo boutique br
-    brussels bs bt build builders business buzz bv bw by bz bzh ca cab camera camp
-    cancerresearch capetown capital caravan cards care career careers cash cat
-    catering cc cd center ceo cern cf cg ch cheap christmas church ci citic city ck
-    cl claims cleaning click clinic clothing club cm cn co codes coffee college
-    cologne com community company computer condos construction consulting
-    contractors cooking cool coop country cr credit creditcard cruises cu cuisinella
-    cv cw cx cy cymru cz dad dance dating day de deals degree democrat dental
-    dentist desi diamonds diet digital direct directory discount dj dk dm dnp do
-    domains durban dz eat ec edu education ee eg email engineer engineering
-    enterprises equipment er es esq estate et eu eus events exchange expert exposed
-    fail farm feedback fi finance financial fish fishing fitness fj fk flights
-    florist fm fo foo foundation fr frl frogans fund furniture futbol ga gal gallery
-    gb gbiz gd ge gent gf gg gh gi gift gifts gives gl glass global globo gm gmail
-    gmo gn gop gov gp gq gr graphics gratis green gripe gs gt gu guide guitars guru
-    gw gy hamburg haus healthcare help here hiphop hiv hk hm hn holdings holiday
-    homes horse host hosting house how hr ht hu id ie il im immo immobilien in
-    industries info ing ink institute insure int international investments io iq ir
-    is it je jetzt jm jo jobs joburg jp juegos kaufen ke kg kh ki kim kitchen kiwi
-    km kn koeln kp kr krd kred kw ky kz la lacaixa land lawyer lb lc lease lgbt li
-    life lighting limited limo link lk loans london lotto lr ls lt ltda lu luxe
-    luxury lv ly ma maison management mango market marketing mc md me media meet
-    melbourne meme menu mg mh miami mil mini mk ml mm mn mo mobi moda moe monash
-    mortgage moscow motorcycles mov mp mq mr ms mt mu museum mv mw mx my mz na
-    nagoya name navy nc ne net network neustar new nf ng ngo nhk ni ninja nl no np
-    nr nra nrw nu nyc nz okinawa om ong onl ooo org organic otsuka ovh pa paris
-    partners parts pe pf pg ph photo photography photos physio pics pictures pink
-    pizza pk pl place plumbing pm pn post pr praxi press pro prod productions
-    properties property ps pt pub pw py qa qpon quebec re realtor recipes red rehab
-    reise reisen ren rentals repair report republican rest restaurant reviews rich
-    rio ro rocks rodeo rs rsvp ru ruhr rw ryukyu sa saarland sarl sb sc sca scb
-    schmidt schule scot sd se services sexy sg sh shiksha shoes si singles sj sk sl
-    sm sn so social software sohu solar solutions soy space spiegel sr st su
-    supplies supply support surf surgery suzuki sv sx sy systems sz tatar tattoo tax
-    tc td technology tel tf tg th tienda tips tirol tj tk tl tm tn to today tokyo
-    tools top town toys tr trade training travel tt tv tw tz ua ug uk university
-    uno uol us uy uz va vacations vc ve vegas ventures versicherung vet vg vi viajes
-    villas vision vlaanderen vn vodka vote voting voto voyage vu wales wang watch
-    webcam website wed wf whoswho wien wiki williamhill works ws wtc wtf xn--1qqw23a
-    xn--3bst00m xn--3ds443g xn--3e0b707e xn--45brj9c xn--4gbrim xn--55qw42g
-    xn--55qx5d xn--6frz82g xn--6qq986b3xl xn--80adxhks xn--80ao21a xn--80asehdb
-    xn--80aswg xn--90a3ac xn--c1avg xn--cg4bki xn--clchc0ea0b2g2a9gcd xn--czr694b
-    xn--czru2d xn--d1acj3b xn--fiq228c5hs xn--fiq64b xn--fiqs8s xn--fiqz9s
-    xn--fpcrj9c3d xn--fzc2c9e2c xn--gecrj9c xn--h2brj9c xn--i1b6b1a6a2e xn--io0a7i
-    xn--j1amh xn--j6w193g xn--kprw13d xn--kpry57d xn--kput3i xn--l1acc
-    xn--lgbbat1ad8j xn--mgb9awbf xn--mgba3a4f16a xn--mgbaam7a8h xn--mgbab2bd
-    xn--mgbayh7gpa xn--mgbbh1a71e xn--mgbc0a9azcg xn--mgberp4a5d4ar xn--mgbx4cd0ab
-    xn--ngbc5azd xn--nqv7f xn--nqv7fs00ema xn--o3cw4h xn--ogbpf8fl xn--p1ai
-    xn--pgbs0dh xn--q9jyb4c xn--rhqv96g xn--s9brj9c xn--ses554g xn--unup4y xn--vhquv
-    xn--wgbh1c xn--wgbl6a xn--xhq521b xn--xkc2al3hye2a xn--xkc2dl3a5ee0h
-    xn--yfro4i67o xn--ygbi2ammx xn--zfr164b xxx xyz yachts yandex ye yokohama
-    youtube yt za zm zone zw
-    /) { $self->{valid_tlds}{idn_to_ascii($_)} = 1 }
-
   push (@cmds, {
     setting => 'util_rb_tld',
     is_admin => 1,
@@ -3695,7 +3634,6 @@ e.g. рф, ελ.
       foreach (split(/\s+/, $value)) {
         $self->{valid_tlds}{idn_to_ascii($_)} = 1;
       }
-      dbg("config: added tld list - $value");
     }
   });
 
@@ -3708,175 +3646,6 @@ labels encoded as UTF-8 octets.
 
 =cut
 
-  # DO NOT UPDATE THIS HARDCODED LIST!! It is only as fallback for
-  # transitional period and to be removed later.  TLDs are now maintained in
-  # sa-update 20_aux_tlds.cf.
-  foreach (qw/
-    com.ac edu.ac gov.ac mil.ac net.ac org.ac nom.ad ac.ae co.ae com.ae gov.ae
-    mil.ae name.ae net.ae org.ae pro.ae sch.ae com.af edu.af gov.af net.af
-    co.ag com.ag net.ag nom.ag org.ag com.ai edu.ai gov.ai net.ai off.ai
-    org.ai com.al edu.al gov.al net.al org.al com.an edu.an net.an org.an
-    co.ao ed.ao gv.ao it.ao og.ao pb.ao com.ar edu.ar gov.ar int.ar mil.ar
-    net.ar org.ar e164.arpa in-addr.arpa ip6.arpa iris.arpa uri.arpa urn.arpa
-    ac.at co.at gv.at or.at priv.at act.au asn.au com.au conf.au csiro.au
-    edu.au gov.au id.au info.au net.au nsw.au nt.au org.au otc.au oz.au qld.au
-    sa.au tas.au telememo.au vic.au wa.au com.aw biz.az com.az edu.az gov.az
-    info.az int.az mil.az name.az net.az org.az pp.az co.ba com.ba edu.ba
-    gov.ba mil.ba net.ba org.ba rs.ba unbi.ba unsa.ba com.bb edu.bb gov.bb
-    net.bb org.bb ac.bd com.bd edu.bd gov.bd mil.bd net.bd org.bd ac.be
-    belgie.be dns.be fgov.be gov.bf biz.bh cc.bh com.bh edu.bh gov.bh info.bh
-    net.bh org.bh com.bm edu.bm gov.bm net.bm org.bm com.bn edu.bn net.bn
-    org.bn com.bo edu.bo gob.bo gov.bo int.bo mil.bo net.bo org.bo tv.bo
-    adm.br adv.br agr.br am.br arq.br art.br ato.br bio.br bmd.br cim.br
-    cng.br cnt.br com.br coop.br dpn.br eco.br ecn.br edu.br eng.br esp.br
-    etc.br eti.br far.br fm.br fnd.br fot.br fst.br g12.br ggf.br gov.br
-    imb.br ind.br inf.br jor.br lel.br mat.br med.br mil.br mus.br net.br
-    nom.br not.br ntr.br odo.br org.br ppg.br pro.br psc.br psi.br qsl.br
-    rec.br slg.br srv.br tmp.br trd.br tur.br tv.br vet.br zlg.br com.bs
-    net.bs org.bs com.bt edu.bt gov.bt net.bt org.bt co.bw org.bw gov.by
-    mil.by com.bz net.bz org.bz ab.ca bc.ca gc.ca mb.ca nb.ca nf.ca nl.ca
-    ns.ca nt.ca nu.ca on.ca pe.ca qc.ca sk.ca yk.ca co.ck edu.ck gov.ck net.ck
-    org.ck ac.cn ah.cn bj.cn com.cn cq.cn edu.cn fj.cn gd.cn gov.cn gs.cn
-    gx.cn gz.cn ha.cn hb.cn he.cn hi.cn hk.cn hl.cn hn.cn jl.cn js.cn jx.cn
-    ln.cn mo.cn net.cn nm.cn nx.cn org.cn qh.cn sc.cn sd.cn sh.cn sn.cn sx.cn
-    tj.cn tw.cn xj.cn xz.cn yn.cn zj.cn arts.co com.co edu.co firm.co gov.co
-    info.co int.co mil.co net.co nom.co org.co rec.co web.co lkd.co.im
-    ltd.co.im plc.co.im co.cm com.cm net.cm au.com br.com cn.com de.com eu.com
-    gb.com hu.com no.com qc.com ru.com sa.com se.com uk.com us.com uy.com
-    za.com ac.cr co.cr ed.cr fi.cr go.cr or.cr sa.cr com.cu edu.cu gov.cu
-    inf.cu net.cu org.cu gov.cx ac.cy biz.cy com.cy ekloges.cy gov.cy ltd.cy
-    name.cy net.cy org.cy parliament.cy press.cy pro.cy tm.cy co.dk com.dm
-    edu.dm gov.dm net.dm org.dm art.do com.do edu.do gob.do gov.do mil.do
-    net.do org.do sld.do web.do art.dz asso.dz com.dz edu.dz gov.dz net.dz
-    org.dz pol.dz com.ec edu.ec fin.ec gov.ec info.ec k12.ec med.ec mil.ec
-    net.ec org.ec pro.ec gob.ec co.ee com.ee edu.ee fie.ee med.ee org.ee
-    pri.ee com.eg edu.eg eun.eg gov.eg mil.eg net.eg org.eg sci.eg com.er
-    edu.er gov.er ind.er mil.er net.er org.er com.es edu.es gob.es nom.es
-    org.es biz.et com.et edu.et gov.et info.et name.et net.et org.et aland.fi
-    ac.fj biz.fj com.fj gov.fj id.fj info.fj mil.fj name.fj net.fj org.fj
-    pro.fj school.fj ac.fk co.fk com.fk gov.fk net.fk nom.fk org.fk tm.fr
-    asso.fr nom.fr prd.fr presse.fr com.fr gouv.fr com.ge edu.ge gov.ge mil.ge
-    net.ge org.ge pvt.ge ac.gg alderney.gg co.gg gov.gg guernsey.gg ind.gg
-    ltd.gg net.gg org.gg sark.gg sch.gg com.gh edu.gh gov.gh mil.gh org.gh
-    com.gi edu.gi gov.gi ltd.gi mod.gi org.gi ac.gn com.gn gov.gn net.gn
-    org.gn asso.gp com.gp edu.gp net.gp org.gp com.gr edu.gr gov.gr net.gr
-    org.gr com.gt edu.gt gob.gt ind.gt mil.gt net.gt org.gt com.gu edu.gu
-    gov.gu mil.gu net.gu org.gu com.hk edu.hk gov.hk idv.hk net.hk org.hk
-    com.hn edu.hn gob.hn mil.hn net.hn org.hn com.hr from.hr iz.hr name.hr
-    adult.ht art.ht asso.ht com.ht coop.ht edu.ht firm.ht gouv.ht info.ht
-    med.ht net.ht org.ht perso.ht pol.ht pro.ht rel.ht shop.ht 2000.hu
-    agrar.hu bolt.hu casino.hu city.hu co.hu erotica.hu erotika.hu film.hu
-    forum.hu games.hu hotel.hu info.hu ingatlan.hu jogasz.hu konyvelo.hu
-    lakas.hu media.hu news.hu org.hu priv.hu reklam.hu sex.hu shop.hu sport.hu
-    suli.hu szex.hu tm.hu tozsde.hu utazas.hu video.hu ac.id co.id go.id
-    mil.id net.id or.id sch.id web.id gov.ie ac.il co.il gov.il idf.il k12.il
-    muni.il net.il org.il ac.im co.im gov.im net.im nic.im org.im ac.in co.in
-    edu.in ernet.in firm.in gen.in gov.in ind.in mil.in net.in nic.in org.in
-    res.in com.io gov.io mil.io net.io org.io ac.ir co.ir gov.ir id.ir net.ir
-    org.ir sch.ir edu.it gov.it ac.je co.je gov.je ind.je jersey.je ltd.je
-    net.je org.je sch.je com.jm edu.jm gov.jm net.jm org.jm com.jo edu.jo
-    gov.jo mil.jo net.jo org.jo ac.jp ad.jp aichi.jp akita.jp aomori.jp
-    chiba.jp co.jp ed.jp ehime.jp fukui.jp fukuoka.jp fukushima.jp gifu.jp
-    go.jp gov.jp gr.jp gunma.jp hiroshima.jp hokkaido.jp hyogo.jp ibaraki.jp
-    ishikawa.jp iwate.jp kagawa.jp kagoshima.jp kanagawa.jp kanazawa.jp
-    kawasaki.jp kitakyushu.jp kobe.jp kochi.jp kumamoto.jp kyoto.jp lg.jp
-    matsuyama.jp mie.jp miyagi.jp miyazaki.jp nagano.jp nagasaki.jp nagoya.jp
-    nara.jp ne.jp net.jp niigata.jp oita.jp okayama.jp okinawa.jp or.jp org.jp
-    osaka.jp saga.jp saitama.jp sapporo.jp sendai.jp shiga.jp shimane.jp
-    shizuoka.jp takamatsu.jp tochigi.jp tokushima.jp tokyo.jp tottori.jp
-    toyama.jp utsunomiya.jp wakayama.jp yamagata.jp yamaguchi.jp yamanashi.jp
-    yokohama.jp ac.ke co.ke go.ke ne.ke new.ke or.ke sc.ke com.kg edu.kg
-    gov.kg mil.kg net.kg org.kg com.kh edu.kh gov.kh mil.kh net.kh org.kh
-    per.kh ac.kr busan.kr chungbuk.kr chungnam.kr co.kr daegu.kr daejeon.kr
-    es.kr gangwon.kr go.kr gwangju.kr gyeongbuk.kr gyeonggi.kr gyeongnam.kr
-    hs.kr incheon.kr jeju.kr jeonbuk.kr jeonnam.kr kg.kr kyonggi.kr mil.kr
-    ms.kr ne.kr or.kr pe.kr re.kr sc.kr seoul.kr ulsan.kr com.kw edu.kw gov.kw
-    mil.kw net.kw org.kw com.ky edu.ky gov.ky net.ky org.ky com.kz edu.kz
-    gov.kz mil.kz net.kz org.kz com.la net.la org.la com.lb edu.lb gov.lb
-    mil.lb net.lb org.lb com.lc edu.lc gov.lc net.lc org.lc assn.lk com.lk
-    edu.lk gov.lk grp.lk hotel.lk int.lk ltd.lk net.lk ngo.lk org.lk sch.lk
-    soc.lk web.lk com.lr edu.lr gov.lr net.lr org.lr co.ls org.ls gov.lt
-    mil.lt asn.lv com.lv conf.lv edu.lv gov.lv id.lv mil.lv net.lv org.lv
-    biz.ly com.ly edu.ly gov.ly id.ly med.ly net.ly org.ly plc.ly sch.ly ac.ma
-    co.ma gov.ma net.ma org.ma press.ma asso.mc tm.mc ac.me co.me edu.me
-    gov.me its.me net.me org.me priv.me com.mg edu.mg gov.mg mil.mg nom.mg
-    org.mg prd.mg tm.mg army.mil navy.mil com.mk org.mk com.mm edu.mm gov.mm
-    net.mm org.mm edu.mn gov.mn org.mn com.mo edu.mo gov.mo net.mo org.mo
-    music.mobi weather.mobi co.mp edu.mp gov.mp net.mp org.mp com.mt edu.mt
-    gov.mt net.mt org.mt tm.mt uu.mt co.mu com.mu aero.mv biz.mv com.mv
-    coop.mv edu.mv gov.mv info.mv int.mv mil.mv museum.mv name.mv net.mv
-    org.mv pro.mv ac.mw co.mw com.mw coop.mw edu.mw gov.mw int.mw museum.mw
-    net.mw org.mw com.mx edu.mx gob.mx net.mx org.mx com.my edu.my gov.my
-    mil.my name.my net.my org.my alt.na com.na cul.na edu.na net.na org.na
-    telecom.na unam.na com.nc net.nc org.nc de.net gb.net uk.net ac.ng com.ng
-    edu.ng gov.ng net.ng org.ng sch.ng ac.ni biz.ni com.ni edu.ni gob.ni in.ni
-    info.ni int.ni mil.ni net.ni nom.ni org.ni web.ni fhs.no folkebibl.no
-    fylkesbibl.no herad.no idrett.no kommune.no mil.no museum.no priv.no
-    stat.no tel.no vgs.no com.np edu.np gov.np mil.np net.np org.np biz.nr
-    co.nr com.nr edu.nr fax.nr gov.nr info.nr mob.nr mobil.nr mobile.nr net.nr
-    org.nr tel.nr tlf.nr ac.nz co.nz cri.nz geek.nz gen.nz govt.nz iwi.nz
-    maori.nz mil.nz net.nz org.nz school.nz ac.om biz.om co.om com.om edu.om
-    gov.om med.om mil.om mod.om museum.om net.om org.om pro.om sch.om dk.org
-    eu.org abo.pa ac.pa com.pa edu.pa gob.pa ing.pa med.pa net.pa nom.pa
-    org.pa sld.pa com.pe edu.pe gob.pe mil.pe net.pe nom.pe org.pe com.pf
-    edu.pf org.pf ac.pg com.pg net.pg com.ph edu.ph gov.ph mil.ph net.ph
-    ngo.ph org.ph biz.pk com.pk edu.pk fam.pk gob.pk gok.pk gon.pk gop.pk
-    gos.pk gov.pk net.pk org.pk web.pk art.pl biz.pl com.pl edu.pl gov.pl
-    info.pl mil.pl net.pl ngo.pl org.pl biz.pr com.pr edu.pr gov.pr info.pr
-    isla.pr name.pr net.pr org.pr pro.pr cpa.pro law.pro med.pro com.ps edu.ps
-    gov.ps net.ps org.ps plo.ps sec.ps com.pt edu.pt gov.pt int.pt net.pt
-    nome.pt org.pt publ.pt com.py edu.py gov.py net.py org.py com.qa edu.qa
-    gov.qa net.qa org.qa asso.re com.re nom.re arts.ro com.ro firm.ro info.ro
-    nom.ro nt.ro org.ro rec.ro store.ro tm.ro www.ro ac.rs co.rs edu.rs gov.rs
-    in.rs org.rs ac.ru com.ru edu.ru gov.ru int.ru mil.ru net.ru org.ru pp.ru
-    ac.rw co.rw com.rw edu.rw gouv.rw gov.rw int.rw mil.rw net.rw com.sa
-    edu.sa gov.sa med.sa net.sa org.sa pub.sa sch.sa com.sb edu.sb gov.sb
-    net.sb org.sb com.sc edu.sc gov.sc net.sc org.sc com.sd edu.sd gov.sd
-    info.sd med.sd net.sd org.sd sch.sd tv.sd ab.se ac.se bd.se brand.se c.se
-    d.se e.se f.se fh.se fhsk.se fhv.se g.se h.se i.se k.se komforb.se
-    kommunalforbund.se komvux.se lanarb.se lanbib.se m.se mil.se n.se
-    naturbruksgymn.se o.se org.se parti.se pp.se press.se s.se sshn.se t.se
-    tm.se u.se w.se x.se y.se z.se com.sg edu.sg gov.sg idn.sg net.sg org.sg
-    per.sg com.sh edu.sh gov.sh mil.sh net.sh org.sh edu.sk gov.sk mil.sk
-    co.st com.st consulado.st edu.st embaixada.st gov.st mil.st net.st org.st
-    principe.st saotome.st store.st com.sv edu.sv gob.sv org.sv red.sv com.sy
-    gov.sy net.sy org.sy at.tf bg.tf ca.tf ch.tf cz.tf de.tf edu.tf eu.tf
-    int.tf net.tf pl.tf ru.tf sg.tf us.tf ac.th co.th go.th in.th mi.th net.th
-    or.th ac.tj biz.tj co.tj com.tj edu.tj go.tj gov.tj int.tj mil.tj name.tj
-    net.tj org.tj web.tj com.tn edunet.tn ens.tn fin.tn gov.tn ind.tn info.tn
-    intl.tn nat.tn net.tn org.tn rnrt.tn rns.tn rnu.tn tourism.tn gov.to
-    av.tr bbs.tr bel.tr biz.tr com.tr dr.tr edu.tr gen.tr gov.tr
-    info.tr k12.tr mil.tr name.tr net.tr org.tr pol.tr tel.tr web.tr aero.tt
-    at.tt au.tt be.tt biz.tt ca.tt co.tt com.tt coop.tt de.tt dk.tt edu.tt
-    es.tt eu.tt fr.tt gov.tt info.tt int.tt it.tt jobs.tt mobi.tt museum.tt
-    name.tt net.tt nic.tt org.tt pro.tt se.tt travel.tt uk.tt us.tt co.tv
-    gov.tv club.tw com.tw ebiz.tw edu.tw game.tw gov.tw idv.tw mil.tw net.tw
-    org.tw ac.tz co.tz go.tz ne.tz or.tz cherkassy.ua chernigov.ua
-    chernovtsy.ua ck.ua cn.ua co.ua com.ua crimea.ua cv.ua dn.ua
-    dnepropetrovsk.ua donetsk.ua dp.ua edu.ua gov.ua if.ua in.ua
-    ivano-frankivsk.ua kh.ua kharkov.ua kherson.ua khmelnitskiy.ua kiev.ua
-    kirovograd.ua km.ua kr.ua ks.ua kv.ua lg.ua lugansk.ua lutsk.ua lviv.ua
-    mk.ua net.ua nikolaev.ua od.ua odessa.ua org.ua pl.ua poltava.ua rovno.ua
-    rv.ua sebastopol.ua sumy.ua te.ua ternopil.ua uzhgorod.ua vinnica.ua vn.ua
-    zaporizhzhe.ua zhitomir.ua zp.ua zt.ua ac.ug co.ug go.ug ne.ug or.ug sc.ug
-    ac.uk bl.uk british-library.uk co.uk edu.uk gov.uk icnet.uk jet.uk ltd.uk
-    me.uk mod.uk national-library-scotland.uk net.uk nhs.uk nic.uk nls.uk
-    org.uk parliament.uk plc.uk police.uk sch.uk ak.us al.us ar.us az.us ca.us
-    co.us ct.us dc.us de.us dni.us fed.us fl.us ga.us hi.us ia.us id.us il.us
-    in.us isa.us kids.us ks.us ky.us la.us ma.us md.us me.us mi.us mn.us mo.us
-    ms.us mt.us nc.us nd.us ne.us nh.us nj.us nm.us nsn.us nv.us ny.us oh.us
-    ok.us or.us pa.us ri.us sc.us sd.us tn.us tx.us ut.us va.us vt.us wa.us
-    wi.us wv.us wy.us com.uy edu.uy gub.uy mil.uy net.uy org.uy vatican.va
-    arts.ve bib.ve co.ve com.ve edu.ve firm.ve gov.ve info.ve int.ve mil.ve
-    net.ve nom.ve org.ve rec.ve store.ve tec.ve web.ve co.vi com.vi edu.vi
-    gov.vi net.vi org.vi ac.vn biz.vn com.vn edu.vn gov.vn health.vn info.vn
-    int.vn name.vn net.vn org.vn pro.vn ch.vu com.vu de.vu edu.vu fr.vu net.vu
-    org.vu com.ws edu.ws gov.ws net.ws org.ws com.ye edu.ye gov.ye mil.ye
-    net.ye org.ye ac.za alt.za bourse.za city.za co.za edu.za gov.za law.za
-    mil.za net.za ngo.za nom.za org.za school.za tm.za web.za ac.zm co.zm
-    com.zm edu.zm gov.zm org.zm sch.zm ac.zw co.zw gov.zw org.zw
-    /) { $self->{two_level_domains}{idn_to_ascii($_)} = 1 }
-
   push (@cmds, {
     setting => 'util_rb_2tld',
     is_admin => 1,
@@ -3903,13 +3672,6 @@ Unicode labels encoded as UTF-8 octets.
 
 =cut
 
-  # DO NOT UPDATE THIS HARDCODED LIST!! It is only as fallback for
-  # transitional period and to be removed later.  TLDs are now maintained in
-  # sa-update 20_aux_tlds.cf.
-  foreach (qw/
-    demon.co.uk esc.edu.ar lkd.co.im plc.co.im
-    /) { $self->{three_level_domains}{idn_to_ascii($_)} = 1 }
-
   push (@cmds, {
     setting => 'util_rb_3tld',
     is_admin => 1,

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/FreeMail.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/FreeMail.pm?rev=1845096&r1=1845095&r2=1845096&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/FreeMail.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/FreeMail.pm Mon Oct 29 10:29:15 2018
@@ -155,7 +155,14 @@ sub new {
     $self->register_eval_rule("check_freemail_header");
     $self->register_eval_rule("check_freemail_body");
 
-    # Need to init the regex here, utilizing registryboundaries->valid_tlds_re
+    return $self;
+}
+
+sub _init_email_regex {
+    my ($self) = @_;
+
+    dbg("initializing email regex");
+
     # Some regexp tips courtesy of http://www.regular-expressions.info/email.html
     # full email regex v0.02
     $self->{email_regex} = qr/
@@ -168,10 +175,7 @@ sub new {
       (?:[a-z0-9](?:[a-z0-9-]{0,59}[a-z0-9])?\.){1,4} # max 4x61 char parts (should be enough?)
       $self->{main}->{registryboundaries}->{valid_tlds_re}	# ends with valid tld
       )
-      (?!(?:[a-z0-9-]|\.[a-z0-9]))		# make sure domain ends here
     /xi;
-
-    return $self;
 }
 
 sub set_config {
@@ -276,7 +280,8 @@ sub finish_parsing_end {
         my $doms = join('|', @domains);
         $self->{freemail_domains_re} = qr/\@(?:${doms})$/;
         $wcount = scalar @domains;
-        undef %{$self->{freemail_temp_wc}};
+        undef $self->{freemail_temp_wc};
+        delete $self->{freemail_temp_wc};
     }
 
     my $count = scalar keys %{$self->{freemail_domains}};
@@ -293,6 +298,12 @@ sub finish_parsing_end {
         $self->{freemail_available} = 0;
     }
 
+    # valid_tlds_re will be available at finish_parsing_end, compile it now,
+    # we only need to do it once and before possible forking
+    if ($self->{freemail_available} && !$self->{email_regex}) {
+        $self->_init_email_regex();
+    }
+
     return 0;
 }
 

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/HashBL.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/HashBL.pm?rev=1845096&r1=1845095&r2=1845096&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/HashBL.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/HashBL.pm Mon Oct 29 10:29:15 2018
@@ -78,7 +78,26 @@ sub new {
     $self->set_config($mailsa->{conf});
     $self->register_eval_rule("check_hashbl_emails");
 
-    # Need to init the regex here, utilizing registryboundaries->valid_tlds_re
+    return $self;
+}
+
+sub finish_parsing_end {
+    my ($self, $opts) = @_;
+
+    # valid_tlds_re will be available at finish_parsing_end, compile it now,
+    # we only need to do it once and before possible forking
+    if ($self->{hashbl_available} && !$self->{email_regex}) {
+      $self->_init_email_regex();
+    }
+
+    return 0;
+}
+
+sub _init_email_regex {
+    my ($self) = @_;
+
+    dbg("initializing email regex");
+
     # Some regexp tips courtesy of http://www.regular-expressions.info/email.html
     # full email regex v0.02
     $self->{email_regex} = qr/
@@ -91,10 +110,7 @@ sub new {
       (?:[a-z0-9](?:[a-z0-9-]{0,59}[a-z0-9])?\.){1,4} # max 4x61 char parts (should be enough?)
       $self->{main}->{registryboundaries}->{valid_tlds_re}	# ends with valid tld
       )
-      (?!(?:[a-z0-9-]|\.[a-z0-9]))		# make sure domain ends here
     /xi;
-
-    return $self;
 }
 
 sub set_config {

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/RegistryBoundaries.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/RegistryBoundaries.pm?rev=1845096&r1=1845095&r2=1845096&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/RegistryBoundaries.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/RegistryBoundaries.pm Mon Oct 29 10:29:15 2018
@@ -53,13 +53,23 @@ sub new {
   # Initialize valid_tlds_re for schemeless uri parsing, FreeMail etc
   if ($self->{conf}->{valid_tlds} && %{$self->{conf}->{valid_tlds}}) {
     # International domain names are already in ASCII-compatible encoding (ACE)
-    my $tlds = join('|', keys %{$self->{conf}->{valid_tlds}});
+    my $tlds = 
+      '(?<![a-zA-Z0-9-])'. # make sure tld starts at boundary
+      join('|', keys %{$self->{conf}->{valid_tlds}}).
+      '(?!(?:[a-zA-Z0-9-]|\.[a-zA-Z0-9]))'; # make sure it ends
     # Perl 5.10+ trie optimizes lists, no need for fancy regex optimizing
-    $self->{valid_tlds_re} = qr/(?:$tlds)/i;
+    if (eval { $self->{valid_tlds_re} = qr/$tlds/; 1; }) {
+      dbg("config: registryboundaries: %d tlds loaded",
+        scalar keys %{$self->{conf}->{valid_tlds}});
+    } else {
+      warn "config: registryboundaries: failed to compile valid_tlds_re: $@\n";
+      $self->{valid_tlds_re} = qr/no_tlds_defined/;
+    }
   }
   else {
     # Failsafe in case no tlds defined, we don't want this to match everything..
     $self->{valid_tlds_re} = qr/no_tlds_defined/;
+    warn "config: registryboundaries: no tlds defined, need to run sa-update\n";
   }
 
   $self;