You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by he...@apache.org on 2018/10/29 10:29:15 UTC
svn commit: r1845096 - in /spamassassin: branches/3.4/lib/Mail/
branches/3.4/lib/Mail/SpamAssassin/
branches/3.4/lib/Mail/SpamAssassin/Plugin/ trunk/lib/Mail/
trunk/lib/Mail/SpamAssassin/ trunk/lib/Mail/SpamAssassin/Plugin/
Author: hege
Date: Mon Oct 29 10:29:15 2018
New Revision: 1845096
URL: http://svn.apache.org/viewvc?rev=1845096&view=rev
Log:
Make RegistryBoundaries actually use 20_aux_tlds.cf, initialize it only after configuration is parsed. Fix plugins to handle valid_tlds_re at finish_parsing_end. Remove old hardcoded list, only sa-update is now supported.
Modified:
spamassassin/branches/3.4/lib/Mail/SpamAssassin.pm
spamassassin/branches/3.4/lib/Mail/SpamAssassin/Conf.pm
spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/FreeMail.pm
spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/HashBL.pm
spamassassin/branches/3.4/lib/Mail/SpamAssassin/RegistryBoundaries.pm
spamassassin/trunk/lib/Mail/SpamAssassin.pm
spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm
spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/FreeMail.pm
spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/HashBL.pm
spamassassin/trunk/lib/Mail/SpamAssassin/RegistryBoundaries.pm
Modified: spamassassin/branches/3.4/lib/Mail/SpamAssassin.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.4/lib/Mail/SpamAssassin.pm?rev=1845096&r1=1845095&r2=1845096&view=diff
==============================================================================
--- spamassassin/branches/3.4/lib/Mail/SpamAssassin.pm (original)
+++ spamassassin/branches/3.4/lib/Mail/SpamAssassin.pm Mon Oct 29 10:29:15 2018
@@ -429,7 +429,6 @@ sub new {
}
$self->{conf} ||= new Mail::SpamAssassin::Conf ($self);
- $self->{registryboundaries} = Mail::SpamAssassin::RegistryBoundaries->new ($self);
$self->{plugins} = Mail::SpamAssassin::PluginHandler->new ($self);
$self->{save_pattern_hits} ||= 0;
@@ -2120,14 +2119,22 @@ sub have_plugin {
sub call_plugins {
my $self = shift;
+ my $subname = shift;
# We could potentially get called after a finish(), so just return.
return unless $self->{plugins};
+ # Use some calls ourself too
+ if ($subname eq 'finish_parsing_end') {
+ # Initialize RegistryBoundaries, now that util_rb_tld etc from config is
+ # read. Plugins can also now use {valid_tlds_re} to one time compile
+ # regexes in finish_parsing_end.
+ $self->{registryboundaries} = Mail::SpamAssassin::RegistryBoundaries->new ($self);
+ }
+
# safety net in case some plugin changes global settings, Bug 6218
local $/ = $/; # prevent underlying modules from changing the global $/
- my $subname = shift;
return $self->{plugins}->callback($subname, @_);
}
Modified: spamassassin/branches/3.4/lib/Mail/SpamAssassin/Conf.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.4/lib/Mail/SpamAssassin/Conf.pm?rev=1845096&r1=1845095&r2=1845096&view=diff
==============================================================================
--- spamassassin/branches/3.4/lib/Mail/SpamAssassin/Conf.pm (original)
+++ spamassassin/branches/3.4/lib/Mail/SpamAssassin/Conf.pm Mon Oct 29 10:29:15 2018
@@ -3540,67 +3540,6 @@ TLDs include things like com, net, org,
=cut
- # DO NOT UPDATE THIS HARDCODED LIST!! It is only as fallback for
- # transitional period and to be removed later. TLDs are now maintained in
- # sa-update 20_aux_tlds.cf.
- foreach (qw/
- ac academy accountants active actor ad ae aero af ag agency ai airforce al am an
- ao aq ar archi army arpa as asia associates at attorney au auction audio autos
- aw ax axa az ba bar bargains bayern bb bd be beer berlin best bf bg bh bi bid
- bike bio biz bj black blackfriday blue bm bmw bn bnpparibas bo boo boutique br
- brussels bs bt build builders business buzz bv bw by bz bzh ca cab camera camp
- cancerresearch capetown capital caravan cards care career careers cash cat
- catering cc cd center ceo cern cf cg ch cheap christmas church ci citic city ck
- cl claims cleaning click clinic clothing club cm cn co codes coffee college
- cologne com community company computer condos construction consulting
- contractors cooking cool coop country cr credit creditcard cruises cu cuisinella
- cv cw cx cy cymru cz dad dance dating day de deals degree democrat dental
- dentist desi diamonds diet digital direct directory discount dj dk dm dnp do
- domains durban dz eat ec edu education ee eg email engineer engineering
- enterprises equipment er es esq estate et eu eus events exchange expert exposed
- fail farm feedback fi finance financial fish fishing fitness fj fk flights
- florist fm fo foo foundation fr frl frogans fund furniture futbol ga gal gallery
- gb gbiz gd ge gent gf gg gh gi gift gifts gives gl glass global globo gm gmail
- gmo gn gop gov gp gq gr graphics gratis green gripe gs gt gu guide guitars guru
- gw gy hamburg haus healthcare help here hiphop hiv hk hm hn holdings holiday
- homes horse host hosting house how hr ht hu id ie il im immo immobilien in
- industries info ing ink institute insure int international investments io iq ir
- is it je jetzt jm jo jobs joburg jp juegos kaufen ke kg kh ki kim kitchen kiwi
- km kn koeln kp kr krd kred kw ky kz la lacaixa land lawyer lb lc lease lgbt li
- life lighting limited limo link lk loans london lotto lr ls lt ltda lu luxe
- luxury lv ly ma maison management mango market marketing mc md me media meet
- melbourne meme menu mg mh miami mil mini mk ml mm mn mo mobi moda moe monash
- mortgage moscow motorcycles mov mp mq mr ms mt mu museum mv mw mx my mz na
- nagoya name navy nc ne net network neustar new nf ng ngo nhk ni ninja nl no np
- nr nra nrw nu nyc nz okinawa om ong onl ooo org organic otsuka ovh pa paris
- partners parts pe pf pg ph photo photography photos physio pics pictures pink
- pizza pk pl place plumbing pm pn post pr praxi press pro prod productions
- properties property ps pt pub pw py qa qpon quebec re realtor recipes red rehab
- reise reisen ren rentals repair report republican rest restaurant reviews rich
- rio ro rocks rodeo rs rsvp ru ruhr rw ryukyu sa saarland sarl sb sc sca scb
- schmidt schule scot sd se services sexy sg sh shiksha shoes si singles sj sk sl
- sm sn so social software sohu solar solutions soy space spiegel sr st su
- supplies supply support surf surgery suzuki sv sx sy systems sz tatar tattoo tax
- tc td technology tel tf tg th tienda tips tirol tj tk tl tm tn to today tokyo
- tools top town toys tr trade training travel tt tv tw tz ua ug uk university
- uno uol us uy uz va vacations vc ve vegas ventures versicherung vet vg vi viajes
- villas vision vlaanderen vn vodka vote voting voto voyage vu wales wang watch
- webcam website wed wf whoswho wien wiki williamhill works ws wtc wtf xn--1qqw23a
- xn--3bst00m xn--3ds443g xn--3e0b707e xn--45brj9c xn--4gbrim xn--55qw42g
- xn--55qx5d xn--6frz82g xn--6qq986b3xl xn--80adxhks xn--80ao21a xn--80asehdb
- xn--80aswg xn--90a3ac xn--c1avg xn--cg4bki xn--clchc0ea0b2g2a9gcd xn--czr694b
- xn--czru2d xn--d1acj3b xn--fiq228c5hs xn--fiq64b xn--fiqs8s xn--fiqz9s
- xn--fpcrj9c3d xn--fzc2c9e2c xn--gecrj9c xn--h2brj9c xn--i1b6b1a6a2e xn--io0a7i
- xn--j1amh xn--j6w193g xn--kprw13d xn--kpry57d xn--kput3i xn--l1acc
- xn--lgbbat1ad8j xn--mgb9awbf xn--mgba3a4f16a xn--mgbaam7a8h xn--mgbab2bd
- xn--mgbayh7gpa xn--mgbbh1a71e xn--mgbc0a9azcg xn--mgberp4a5d4ar xn--mgbx4cd0ab
- xn--ngbc5azd xn--nqv7f xn--nqv7fs00ema xn--o3cw4h xn--ogbpf8fl xn--p1ai
- xn--pgbs0dh xn--q9jyb4c xn--rhqv96g xn--s9brj9c xn--ses554g xn--unup4y xn--vhquv
- xn--wgbh1c xn--wgbl6a xn--xhq521b xn--xkc2al3hye2a xn--xkc2dl3a5ee0h
- xn--yfro4i67o xn--ygbi2ammx xn--zfr164b xxx xyz yachts yandex ye yokohama
- youtube yt za zm zone zw
- /) { $self->{valid_tlds}{lc $_} = 1; }
-
push (@cmds, {
setting => 'util_rb_tld',
is_admin => 1,
@@ -3615,7 +3554,6 @@ TLDs include things like com, net, org,
foreach (split(/\s+/, $value)) {
$self->{valid_tlds}{lc $_} = 1;
}
- dbg("config: added tld list - $value");
}
});
@@ -3626,175 +3564,6 @@ code. 2TLDs include things like co.uk,
=cut
- # DO NOT UPDATE THIS HARDCODED LIST!! It is only as fallback for
- # transitional period and to be removed later. TLDs are now maintained in
- # sa-update 20_aux_tlds.cf.
- foreach (qw/
- com.ac edu.ac gov.ac mil.ac net.ac org.ac nom.ad ac.ae co.ae com.ae gov.ae
- mil.ae name.ae net.ae org.ae pro.ae sch.ae com.af edu.af gov.af net.af
- co.ag com.ag net.ag nom.ag org.ag com.ai edu.ai gov.ai net.ai off.ai
- org.ai com.al edu.al gov.al net.al org.al com.an edu.an net.an org.an
- co.ao ed.ao gv.ao it.ao og.ao pb.ao com.ar edu.ar gov.ar int.ar mil.ar
- net.ar org.ar e164.arpa in-addr.arpa ip6.arpa iris.arpa uri.arpa urn.arpa
- ac.at co.at gv.at or.at priv.at act.au asn.au com.au conf.au csiro.au
- edu.au gov.au id.au info.au net.au nsw.au nt.au org.au otc.au oz.au qld.au
- sa.au tas.au telememo.au vic.au wa.au com.aw biz.az com.az edu.az gov.az
- info.az int.az mil.az name.az net.az org.az pp.az co.ba com.ba edu.ba
- gov.ba mil.ba net.ba org.ba rs.ba unbi.ba unsa.ba com.bb edu.bb gov.bb
- net.bb org.bb ac.bd com.bd edu.bd gov.bd mil.bd net.bd org.bd ac.be
- belgie.be dns.be fgov.be gov.bf biz.bh cc.bh com.bh edu.bh gov.bh info.bh
- net.bh org.bh com.bm edu.bm gov.bm net.bm org.bm com.bn edu.bn net.bn
- org.bn com.bo edu.bo gob.bo gov.bo int.bo mil.bo net.bo org.bo tv.bo
- adm.br adv.br agr.br am.br arq.br art.br ato.br bio.br bmd.br cim.br
- cng.br cnt.br com.br coop.br dpn.br eco.br ecn.br edu.br eng.br esp.br
- etc.br eti.br far.br fm.br fnd.br fot.br fst.br g12.br ggf.br gov.br
- imb.br ind.br inf.br jor.br lel.br mat.br med.br mil.br mus.br net.br
- nom.br not.br ntr.br odo.br org.br ppg.br pro.br psc.br psi.br qsl.br
- rec.br slg.br srv.br tmp.br trd.br tur.br tv.br vet.br zlg.br com.bs
- net.bs org.bs com.bt edu.bt gov.bt net.bt org.bt co.bw org.bw gov.by
- mil.by com.bz net.bz org.bz ab.ca bc.ca gc.ca mb.ca nb.ca nf.ca nl.ca
- ns.ca nt.ca nu.ca on.ca pe.ca qc.ca sk.ca yk.ca co.ck edu.ck gov.ck net.ck
- org.ck ac.cn ah.cn bj.cn com.cn cq.cn edu.cn fj.cn gd.cn gov.cn gs.cn
- gx.cn gz.cn ha.cn hb.cn he.cn hi.cn hk.cn hl.cn hn.cn jl.cn js.cn jx.cn
- ln.cn mo.cn net.cn nm.cn nx.cn org.cn qh.cn sc.cn sd.cn sh.cn sn.cn sx.cn
- tj.cn tw.cn xj.cn xz.cn yn.cn zj.cn arts.co com.co edu.co firm.co gov.co
- info.co int.co mil.co net.co nom.co org.co rec.co web.co lkd.co.im
- ltd.co.im plc.co.im co.cm com.cm net.cm au.com br.com cn.com de.com eu.com
- gb.com hu.com no.com qc.com ru.com sa.com se.com uk.com us.com uy.com
- za.com ac.cr co.cr ed.cr fi.cr go.cr or.cr sa.cr com.cu edu.cu gov.cu
- inf.cu net.cu org.cu gov.cx ac.cy biz.cy com.cy ekloges.cy gov.cy ltd.cy
- name.cy net.cy org.cy parliament.cy press.cy pro.cy tm.cy co.dk com.dm
- edu.dm gov.dm net.dm org.dm art.do com.do edu.do gob.do gov.do mil.do
- net.do org.do sld.do web.do art.dz asso.dz com.dz edu.dz gov.dz net.dz
- org.dz pol.dz com.ec edu.ec fin.ec gov.ec info.ec k12.ec med.ec mil.ec
- net.ec org.ec pro.ec gob.ec co.ee com.ee edu.ee fie.ee med.ee org.ee
- pri.ee com.eg edu.eg eun.eg gov.eg mil.eg net.eg org.eg sci.eg com.er
- edu.er gov.er ind.er mil.er net.er org.er com.es edu.es gob.es nom.es
- org.es biz.et com.et edu.et gov.et info.et name.et net.et org.et aland.fi
- ac.fj biz.fj com.fj gov.fj id.fj info.fj mil.fj name.fj net.fj org.fj
- pro.fj school.fj ac.fk co.fk com.fk gov.fk net.fk nom.fk org.fk tm.fr
- asso.fr nom.fr prd.fr presse.fr com.fr gouv.fr com.ge edu.ge gov.ge mil.ge
- net.ge org.ge pvt.ge ac.gg alderney.gg co.gg gov.gg guernsey.gg ind.gg
- ltd.gg net.gg org.gg sark.gg sch.gg com.gh edu.gh gov.gh mil.gh org.gh
- com.gi edu.gi gov.gi ltd.gi mod.gi org.gi ac.gn com.gn gov.gn net.gn
- org.gn asso.gp com.gp edu.gp net.gp org.gp com.gr edu.gr gov.gr net.gr
- org.gr com.gt edu.gt gob.gt ind.gt mil.gt net.gt org.gt com.gu edu.gu
- gov.gu mil.gu net.gu org.gu com.hk edu.hk gov.hk idv.hk net.hk org.hk
- com.hn edu.hn gob.hn mil.hn net.hn org.hn com.hr from.hr iz.hr name.hr
- adult.ht art.ht asso.ht com.ht coop.ht edu.ht firm.ht gouv.ht info.ht
- med.ht net.ht org.ht perso.ht pol.ht pro.ht rel.ht shop.ht 2000.hu
- agrar.hu bolt.hu casino.hu city.hu co.hu erotica.hu erotika.hu film.hu
- forum.hu games.hu hotel.hu info.hu ingatlan.hu jogasz.hu konyvelo.hu
- lakas.hu media.hu news.hu org.hu priv.hu reklam.hu sex.hu shop.hu sport.hu
- suli.hu szex.hu tm.hu tozsde.hu utazas.hu video.hu ac.id co.id go.id
- mil.id net.id or.id sch.id web.id gov.ie ac.il co.il gov.il idf.il k12.il
- muni.il net.il org.il ac.im co.im gov.im net.im nic.im org.im ac.in co.in
- edu.in ernet.in firm.in gen.in gov.in ind.in mil.in net.in nic.in org.in
- res.in com.io gov.io mil.io net.io org.io ac.ir co.ir gov.ir id.ir net.ir
- org.ir sch.ir edu.it gov.it ac.je co.je gov.je ind.je jersey.je ltd.je
- net.je org.je sch.je com.jm edu.jm gov.jm net.jm org.jm com.jo edu.jo
- gov.jo mil.jo net.jo org.jo ac.jp ad.jp aichi.jp akita.jp aomori.jp
- chiba.jp co.jp ed.jp ehime.jp fukui.jp fukuoka.jp fukushima.jp gifu.jp
- go.jp gov.jp gr.jp gunma.jp hiroshima.jp hokkaido.jp hyogo.jp ibaraki.jp
- ishikawa.jp iwate.jp kagawa.jp kagoshima.jp kanagawa.jp kanazawa.jp
- kawasaki.jp kitakyushu.jp kobe.jp kochi.jp kumamoto.jp kyoto.jp lg.jp
- matsuyama.jp mie.jp miyagi.jp miyazaki.jp nagano.jp nagasaki.jp nagoya.jp
- nara.jp ne.jp net.jp niigata.jp oita.jp okayama.jp okinawa.jp or.jp org.jp
- osaka.jp saga.jp saitama.jp sapporo.jp sendai.jp shiga.jp shimane.jp
- shizuoka.jp takamatsu.jp tochigi.jp tokushima.jp tokyo.jp tottori.jp
- toyama.jp utsunomiya.jp wakayama.jp yamagata.jp yamaguchi.jp yamanashi.jp
- yokohama.jp ac.ke co.ke go.ke ne.ke new.ke or.ke sc.ke com.kg edu.kg
- gov.kg mil.kg net.kg org.kg com.kh edu.kh gov.kh mil.kh net.kh org.kh
- per.kh ac.kr busan.kr chungbuk.kr chungnam.kr co.kr daegu.kr daejeon.kr
- es.kr gangwon.kr go.kr gwangju.kr gyeongbuk.kr gyeonggi.kr gyeongnam.kr
- hs.kr incheon.kr jeju.kr jeonbuk.kr jeonnam.kr kg.kr kyonggi.kr mil.kr
- ms.kr ne.kr or.kr pe.kr re.kr sc.kr seoul.kr ulsan.kr com.kw edu.kw gov.kw
- mil.kw net.kw org.kw com.ky edu.ky gov.ky net.ky org.ky com.kz edu.kz
- gov.kz mil.kz net.kz org.kz com.la net.la org.la com.lb edu.lb gov.lb
- mil.lb net.lb org.lb com.lc edu.lc gov.lc net.lc org.lc assn.lk com.lk
- edu.lk gov.lk grp.lk hotel.lk int.lk ltd.lk net.lk ngo.lk org.lk sch.lk
- soc.lk web.lk com.lr edu.lr gov.lr net.lr org.lr co.ls org.ls gov.lt
- mil.lt asn.lv com.lv conf.lv edu.lv gov.lv id.lv mil.lv net.lv org.lv
- biz.ly com.ly edu.ly gov.ly id.ly med.ly net.ly org.ly plc.ly sch.ly ac.ma
- co.ma gov.ma net.ma org.ma press.ma asso.mc tm.mc ac.me co.me edu.me
- gov.me its.me net.me org.me priv.me com.mg edu.mg gov.mg mil.mg nom.mg
- org.mg prd.mg tm.mg army.mil navy.mil com.mk org.mk com.mm edu.mm gov.mm
- net.mm org.mm edu.mn gov.mn org.mn com.mo edu.mo gov.mo net.mo org.mo
- music.mobi weather.mobi co.mp edu.mp gov.mp net.mp org.mp com.mt edu.mt
- gov.mt net.mt org.mt tm.mt uu.mt co.mu com.mu aero.mv biz.mv com.mv
- coop.mv edu.mv gov.mv info.mv int.mv mil.mv museum.mv name.mv net.mv
- org.mv pro.mv ac.mw co.mw com.mw coop.mw edu.mw gov.mw int.mw museum.mw
- net.mw org.mw com.mx edu.mx gob.mx net.mx org.mx com.my edu.my gov.my
- mil.my name.my net.my org.my alt.na com.na cul.na edu.na net.na org.na
- telecom.na unam.na com.nc net.nc org.nc de.net gb.net uk.net ac.ng com.ng
- edu.ng gov.ng net.ng org.ng sch.ng ac.ni biz.ni com.ni edu.ni gob.ni in.ni
- info.ni int.ni mil.ni net.ni nom.ni org.ni web.ni fhs.no folkebibl.no
- fylkesbibl.no herad.no idrett.no kommune.no mil.no museum.no priv.no
- stat.no tel.no vgs.no com.np edu.np gov.np mil.np net.np org.np biz.nr
- co.nr com.nr edu.nr fax.nr gov.nr info.nr mob.nr mobil.nr mobile.nr net.nr
- org.nr tel.nr tlf.nr ac.nz co.nz cri.nz geek.nz gen.nz govt.nz iwi.nz
- maori.nz mil.nz net.nz org.nz school.nz ac.om biz.om co.om com.om edu.om
- gov.om med.om mil.om mod.om museum.om net.om org.om pro.om sch.om dk.org
- eu.org abo.pa ac.pa com.pa edu.pa gob.pa ing.pa med.pa net.pa nom.pa
- org.pa sld.pa com.pe edu.pe gob.pe mil.pe net.pe nom.pe org.pe com.pf
- edu.pf org.pf ac.pg com.pg net.pg com.ph edu.ph gov.ph mil.ph net.ph
- ngo.ph org.ph biz.pk com.pk edu.pk fam.pk gob.pk gok.pk gon.pk gop.pk
- gos.pk gov.pk net.pk org.pk web.pk art.pl biz.pl com.pl edu.pl gov.pl
- info.pl mil.pl net.pl ngo.pl org.pl biz.pr com.pr edu.pr gov.pr info.pr
- isla.pr name.pr net.pr org.pr pro.pr cpa.pro law.pro med.pro com.ps edu.ps
- gov.ps net.ps org.ps plo.ps sec.ps com.pt edu.pt gov.pt int.pt net.pt
- nome.pt org.pt publ.pt com.py edu.py gov.py net.py org.py com.qa edu.qa
- gov.qa net.qa org.qa asso.re com.re nom.re arts.ro com.ro firm.ro info.ro
- nom.ro nt.ro org.ro rec.ro store.ro tm.ro www.ro ac.rs co.rs edu.rs gov.rs
- in.rs org.rs ac.ru com.ru edu.ru gov.ru int.ru mil.ru net.ru org.ru pp.ru
- ac.rw co.rw com.rw edu.rw gouv.rw gov.rw int.rw mil.rw net.rw com.sa
- edu.sa gov.sa med.sa net.sa org.sa pub.sa sch.sa com.sb edu.sb gov.sb
- net.sb org.sb com.sc edu.sc gov.sc net.sc org.sc com.sd edu.sd gov.sd
- info.sd med.sd net.sd org.sd sch.sd tv.sd ab.se ac.se bd.se brand.se c.se
- d.se e.se f.se fh.se fhsk.se fhv.se g.se h.se i.se k.se komforb.se
- kommunalforbund.se komvux.se lanarb.se lanbib.se m.se mil.se n.se
- naturbruksgymn.se o.se org.se parti.se pp.se press.se s.se sshn.se t.se
- tm.se u.se w.se x.se y.se z.se com.sg edu.sg gov.sg idn.sg net.sg org.sg
- per.sg com.sh edu.sh gov.sh mil.sh net.sh org.sh edu.sk gov.sk mil.sk
- co.st com.st consulado.st edu.st embaixada.st gov.st mil.st net.st org.st
- principe.st saotome.st store.st com.sv edu.sv gob.sv org.sv red.sv com.sy
- gov.sy net.sy org.sy at.tf bg.tf ca.tf ch.tf cz.tf de.tf edu.tf eu.tf
- int.tf net.tf pl.tf ru.tf sg.tf us.tf ac.th co.th go.th in.th mi.th net.th
- or.th ac.tj biz.tj co.tj com.tj edu.tj go.tj gov.tj int.tj mil.tj name.tj
- net.tj org.tj web.tj com.tn edunet.tn ens.tn fin.tn gov.tn ind.tn info.tn
- intl.tn nat.tn net.tn org.tn rnrt.tn rns.tn rnu.tn tourism.tn gov.to
- av.tr bbs.tr bel.tr biz.tr com.tr dr.tr edu.tr gen.tr gov.tr
- info.tr k12.tr mil.tr name.tr net.tr org.tr pol.tr tel.tr web.tr aero.tt
- at.tt au.tt be.tt biz.tt ca.tt co.tt com.tt coop.tt de.tt dk.tt edu.tt
- es.tt eu.tt fr.tt gov.tt info.tt int.tt it.tt jobs.tt mobi.tt museum.tt
- name.tt net.tt nic.tt org.tt pro.tt se.tt travel.tt uk.tt us.tt co.tv
- gov.tv club.tw com.tw ebiz.tw edu.tw game.tw gov.tw idv.tw mil.tw net.tw
- org.tw ac.tz co.tz go.tz ne.tz or.tz cherkassy.ua chernigov.ua
- chernovtsy.ua ck.ua cn.ua co.ua com.ua crimea.ua cv.ua dn.ua
- dnepropetrovsk.ua donetsk.ua dp.ua edu.ua gov.ua if.ua in.ua
- ivano-frankivsk.ua kh.ua kharkov.ua kherson.ua khmelnitskiy.ua kiev.ua
- kirovograd.ua km.ua kr.ua ks.ua kv.ua lg.ua lugansk.ua lutsk.ua lviv.ua
- mk.ua net.ua nikolaev.ua od.ua odessa.ua org.ua pl.ua poltava.ua rovno.ua
- rv.ua sebastopol.ua sumy.ua te.ua ternopil.ua uzhgorod.ua vinnica.ua vn.ua
- zaporizhzhe.ua zhitomir.ua zp.ua zt.ua ac.ug co.ug go.ug ne.ug or.ug sc.ug
- ac.uk bl.uk british-library.uk co.uk edu.uk gov.uk icnet.uk jet.uk ltd.uk
- me.uk mod.uk national-library-scotland.uk net.uk nhs.uk nic.uk nls.uk
- org.uk parliament.uk plc.uk police.uk sch.uk ak.us al.us ar.us az.us ca.us
- co.us ct.us dc.us de.us dni.us fed.us fl.us ga.us hi.us ia.us id.us il.us
- in.us isa.us kids.us ks.us ky.us la.us ma.us md.us me.us mi.us mn.us mo.us
- ms.us mt.us nc.us nd.us ne.us nh.us nj.us nm.us nsn.us nv.us ny.us oh.us
- ok.us or.us pa.us ri.us sc.us sd.us tn.us tx.us ut.us va.us vt.us wa.us
- wi.us wv.us wy.us com.uy edu.uy gub.uy mil.uy net.uy org.uy vatican.va
- arts.ve bib.ve co.ve com.ve edu.ve firm.ve gov.ve info.ve int.ve mil.ve
- net.ve nom.ve org.ve rec.ve store.ve tec.ve web.ve co.vi com.vi edu.vi
- gov.vi net.vi org.vi ac.vn biz.vn com.vn edu.vn gov.vn health.vn info.vn
- int.vn name.vn net.vn org.vn pro.vn ch.vu com.vu de.vu edu.vu fr.vu net.vu
- org.vu com.ws edu.ws gov.ws net.ws org.ws com.ye edu.ye gov.ye mil.ye
- net.ye org.ye ac.za alt.za bourse.za city.za co.za edu.za gov.za law.za
- mil.za net.za ngo.za nom.za org.za school.za tm.za web.za ac.zm co.zm
- com.zm edu.zm gov.zm org.zm sch.zm ac.zw co.zw gov.zw org.zw
- /) { $self->{two_level_domains}{lc $_} = 1; }
-
push (@cmds, {
setting => 'util_rb_2tld',
is_admin => 1,
@@ -3819,13 +3588,6 @@ code. 3TLDs include things like demon.c
=cut
- # DO NOT UPDATE THIS HARDCODED LIST!! It is only as fallback for
- # transitional period and to be removed later. TLDs are now maintained in
- # sa-update 20_aux_tlds.cf.
- foreach (qw/
- demon.co.uk esc.edu.ar lkd.co.im plc.co.im
- /) { $self->{three_level_domains}{lc $_} = 1; }
-
push (@cmds, {
setting => 'util_rb_3tld',
is_admin => 1,
Modified: spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/FreeMail.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/FreeMail.pm?rev=1845096&r1=1845095&r2=1845096&view=diff
==============================================================================
--- spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/FreeMail.pm (original)
+++ spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/FreeMail.pm Mon Oct 29 10:29:15 2018
@@ -155,7 +155,14 @@ sub new {
$self->register_eval_rule("check_freemail_header");
$self->register_eval_rule("check_freemail_body");
- # Need to init the regex here, utilizing registryboundaries->valid_tlds_re
+ return $self;
+}
+
+sub _init_email_regex {
+ my ($self) = @_;
+
+ dbg("initializing email regex");
+
# Some regexp tips courtesy of http://www.regular-expressions.info/email.html
# full email regex v0.02
$self->{email_regex} = qr/
@@ -168,10 +175,7 @@ sub new {
(?:[a-z0-9](?:[a-z0-9-]{0,59}[a-z0-9])?\.){1,4} # max 4x61 char parts (should be enough?)
$self->{main}->{registryboundaries}->{valid_tlds_re} # ends with valid tld
)
- (?!(?:[a-z0-9-]|\.[a-z0-9])) # make sure domain ends here
/xi;
-
- return $self;
}
sub set_config {
@@ -276,7 +280,8 @@ sub finish_parsing_end {
my $doms = join('|', @domains);
$self->{freemail_domains_re} = qr/\@(?:${doms})$/;
$wcount = scalar @domains;
- undef %{$self->{freemail_temp_wc}};
+ undef $self->{freemail_temp_wc};
+ delete $self->{freemail_temp_wc};
}
my $count = scalar keys %{$self->{freemail_domains}};
@@ -293,6 +298,12 @@ sub finish_parsing_end {
$self->{freemail_available} = 0;
}
+ # valid_tlds_re will be available at finish_parsing_end, compile it now,
+ # we only need to do it once and before possible forking
+ if ($self->{freemail_available} && !$self->{email_regex}) {
+ $self->_init_email_regex();
+ }
+
return 0;
}
Modified: spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/HashBL.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/HashBL.pm?rev=1845096&r1=1845095&r2=1845096&view=diff
==============================================================================
--- spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/HashBL.pm (original)
+++ spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/HashBL.pm Mon Oct 29 10:29:15 2018
@@ -77,7 +77,26 @@ sub new {
$self->set_config($mailsa->{conf});
$self->register_eval_rule("check_hashbl_emails");
- # Need to init the regex here, utilizing registryboundaries->valid_tlds_re
+ return $self;
+}
+
+sub finish_parsing_end {
+ my ($self, $opts) = @_;
+
+ # valid_tlds_re will be available at finish_parsing_end, compile it now,
+ # we only need to do it once and before possible forking
+ if ($self->{hashbl_available} && !$self->{email_regex}) {
+ $self->_init_email_regex();
+ }
+
+ return 0;
+}
+
+sub _init_email_regex {
+ my ($self) = @_;
+
+ dbg("initializing email regex");
+
# Some regexp tips courtesy of http://www.regular-expressions.info/email.html
# full email regex v0.02
$self->{email_regex} = qr/
@@ -90,10 +109,7 @@ sub new {
(?:[a-z0-9](?:[a-z0-9-]{0,59}[a-z0-9])?\.){1,4} # max 4x61 char parts (should be enough?)
$self->{main}->{registryboundaries}->{valid_tlds_re} # ends with valid tld
)
- (?!(?:[a-z0-9-]|\.[a-z0-9])) # make sure domain ends here
/xi;
-
- return $self;
}
sub set_config {
Modified: spamassassin/branches/3.4/lib/Mail/SpamAssassin/RegistryBoundaries.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.4/lib/Mail/SpamAssassin/RegistryBoundaries.pm?rev=1845096&r1=1845095&r2=1845096&view=diff
==============================================================================
--- spamassassin/branches/3.4/lib/Mail/SpamAssassin/RegistryBoundaries.pm (original)
+++ spamassassin/branches/3.4/lib/Mail/SpamAssassin/RegistryBoundaries.pm Mon Oct 29 10:29:15 2018
@@ -32,6 +32,8 @@ use re 'taint';
our @ISA = qw();
+use Mail::SpamAssassin::Logger;
+
# called from SpamAssassin->init() to create $self->{util_rb}
sub new {
my $class = shift;
@@ -45,14 +47,25 @@ sub new {
bless ($self, $class);
# Initialize valid_tlds_re for schemeless uri parsing, FreeMail etc
- if ($self->{conf}->{valid_tlds}) {
- my $tlds = join('|', keys %{$self->{conf}->{valid_tlds}});
+ if ($self->{conf}->{valid_tlds} && %{$self->{conf}->{valid_tlds}}) {
+ # International domain names are already in ASCII-compatible encoding (ACE)
+ my $tlds =
+ '(?<![a-zA-Z0-9-])'. # make sure tld starts at boundary
+ join('|', keys %{$self->{conf}->{valid_tlds}}).
+ '(?!(?:[a-zA-Z0-9-]|\.[a-zA-Z0-9]))'; # make sure it ends
# Perl 5.10+ trie optimizes lists, no need for fancy regex optimizing
- $self->{valid_tlds_re} = qr/(?:$tlds)/i;
+ if (eval { $self->{valid_tlds_re} = qr/$tlds/; 1; }) {
+ dbg("config: registryboundaries: %d tlds loaded",
+ scalar keys %{$self->{conf}->{valid_tlds}});
+ } else {
+ warn "config: registryboundaries: failed to compile valid_tlds_re: $@\n";
+ $self->{valid_tlds_re} = qr/no_tlds_defined/;
+ }
}
else {
# Failsafe in case no tlds defined, we don't want this to match everything..
$self->{valid_tlds_re} = qr/no_tlds_defined/;
+ warn "config: registryboundaries: no tlds defined, need to run sa-update\n";
}
$self;
Modified: spamassassin/trunk/lib/Mail/SpamAssassin.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin.pm?rev=1845096&r1=1845095&r2=1845096&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin.pm Mon Oct 29 10:29:15 2018
@@ -428,7 +428,6 @@ sub new {
}
$self->{conf} ||= new Mail::SpamAssassin::Conf ($self);
- $self->{registryboundaries} = Mail::SpamAssassin::RegistryBoundaries->new ($self);
$self->{plugins} = Mail::SpamAssassin::PluginHandler->new ($self);
$self->{save_pattern_hits} ||= 0;
@@ -2195,6 +2194,11 @@ sub call_plugins {
if ($self->{spamd} && $subname eq 'spamd_child_after_non_root') {
# set global dir now if spamd
$self->set_global_state_dir();
+ } elsif ($subname eq 'finish_parsing_end') {
+ # Initialize RegistryBoundaries, now that util_rb_tld etc from config is
+ # read. Plugins can also now use {valid_tlds_re} to one time compile
+ # regexes in finish_parsing_end.
+ $self->{registryboundaries} = Mail::SpamAssassin::RegistryBoundaries->new ($self);
}
# safety net in case some plugin changes global settings, Bug 6218
Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm?rev=1845096&r1=1845095&r2=1845096&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm Mon Oct 29 10:29:15 2018
@@ -3620,67 +3620,6 @@ e.g. ÑÑ, ελ.
=cut
- # DO NOT UPDATE THIS HARDCODED LIST!! It is only as fallback for
- # transitional period and to be removed later. TLDs are now maintained in
- # sa-update 20_aux_tlds.cf.
- foreach (qw/
- ac academy accountants active actor ad ae aero af ag agency ai airforce al am an
- ao aq ar archi army arpa as asia associates at attorney au auction audio autos
- aw ax axa az ba bar bargains bayern bb bd be beer berlin best bf bg bh bi bid
- bike bio biz bj black blackfriday blue bm bmw bn bnpparibas bo boo boutique br
- brussels bs bt build builders business buzz bv bw by bz bzh ca cab camera camp
- cancerresearch capetown capital caravan cards care career careers cash cat
- catering cc cd center ceo cern cf cg ch cheap christmas church ci citic city ck
- cl claims cleaning click clinic clothing club cm cn co codes coffee college
- cologne com community company computer condos construction consulting
- contractors cooking cool coop country cr credit creditcard cruises cu cuisinella
- cv cw cx cy cymru cz dad dance dating day de deals degree democrat dental
- dentist desi diamonds diet digital direct directory discount dj dk dm dnp do
- domains durban dz eat ec edu education ee eg email engineer engineering
- enterprises equipment er es esq estate et eu eus events exchange expert exposed
- fail farm feedback fi finance financial fish fishing fitness fj fk flights
- florist fm fo foo foundation fr frl frogans fund furniture futbol ga gal gallery
- gb gbiz gd ge gent gf gg gh gi gift gifts gives gl glass global globo gm gmail
- gmo gn gop gov gp gq gr graphics gratis green gripe gs gt gu guide guitars guru
- gw gy hamburg haus healthcare help here hiphop hiv hk hm hn holdings holiday
- homes horse host hosting house how hr ht hu id ie il im immo immobilien in
- industries info ing ink institute insure int international investments io iq ir
- is it je jetzt jm jo jobs joburg jp juegos kaufen ke kg kh ki kim kitchen kiwi
- km kn koeln kp kr krd kred kw ky kz la lacaixa land lawyer lb lc lease lgbt li
- life lighting limited limo link lk loans london lotto lr ls lt ltda lu luxe
- luxury lv ly ma maison management mango market marketing mc md me media meet
- melbourne meme menu mg mh miami mil mini mk ml mm mn mo mobi moda moe monash
- mortgage moscow motorcycles mov mp mq mr ms mt mu museum mv mw mx my mz na
- nagoya name navy nc ne net network neustar new nf ng ngo nhk ni ninja nl no np
- nr nra nrw nu nyc nz okinawa om ong onl ooo org organic otsuka ovh pa paris
- partners parts pe pf pg ph photo photography photos physio pics pictures pink
- pizza pk pl place plumbing pm pn post pr praxi press pro prod productions
- properties property ps pt pub pw py qa qpon quebec re realtor recipes red rehab
- reise reisen ren rentals repair report republican rest restaurant reviews rich
- rio ro rocks rodeo rs rsvp ru ruhr rw ryukyu sa saarland sarl sb sc sca scb
- schmidt schule scot sd se services sexy sg sh shiksha shoes si singles sj sk sl
- sm sn so social software sohu solar solutions soy space spiegel sr st su
- supplies supply support surf surgery suzuki sv sx sy systems sz tatar tattoo tax
- tc td technology tel tf tg th tienda tips tirol tj tk tl tm tn to today tokyo
- tools top town toys tr trade training travel tt tv tw tz ua ug uk university
- uno uol us uy uz va vacations vc ve vegas ventures versicherung vet vg vi viajes
- villas vision vlaanderen vn vodka vote voting voto voyage vu wales wang watch
- webcam website wed wf whoswho wien wiki williamhill works ws wtc wtf xn--1qqw23a
- xn--3bst00m xn--3ds443g xn--3e0b707e xn--45brj9c xn--4gbrim xn--55qw42g
- xn--55qx5d xn--6frz82g xn--6qq986b3xl xn--80adxhks xn--80ao21a xn--80asehdb
- xn--80aswg xn--90a3ac xn--c1avg xn--cg4bki xn--clchc0ea0b2g2a9gcd xn--czr694b
- xn--czru2d xn--d1acj3b xn--fiq228c5hs xn--fiq64b xn--fiqs8s xn--fiqz9s
- xn--fpcrj9c3d xn--fzc2c9e2c xn--gecrj9c xn--h2brj9c xn--i1b6b1a6a2e xn--io0a7i
- xn--j1amh xn--j6w193g xn--kprw13d xn--kpry57d xn--kput3i xn--l1acc
- xn--lgbbat1ad8j xn--mgb9awbf xn--mgba3a4f16a xn--mgbaam7a8h xn--mgbab2bd
- xn--mgbayh7gpa xn--mgbbh1a71e xn--mgbc0a9azcg xn--mgberp4a5d4ar xn--mgbx4cd0ab
- xn--ngbc5azd xn--nqv7f xn--nqv7fs00ema xn--o3cw4h xn--ogbpf8fl xn--p1ai
- xn--pgbs0dh xn--q9jyb4c xn--rhqv96g xn--s9brj9c xn--ses554g xn--unup4y xn--vhquv
- xn--wgbh1c xn--wgbl6a xn--xhq521b xn--xkc2al3hye2a xn--xkc2dl3a5ee0h
- xn--yfro4i67o xn--ygbi2ammx xn--zfr164b xxx xyz yachts yandex ye yokohama
- youtube yt za zm zone zw
- /) { $self->{valid_tlds}{idn_to_ascii($_)} = 1 }
-
push (@cmds, {
setting => 'util_rb_tld',
is_admin => 1,
@@ -3695,7 +3634,6 @@ e.g. ÑÑ, ελ.
foreach (split(/\s+/, $value)) {
$self->{valid_tlds}{idn_to_ascii($_)} = 1;
}
- dbg("config: added tld list - $value");
}
});
@@ -3708,175 +3646,6 @@ labels encoded as UTF-8 octets.
=cut
- # DO NOT UPDATE THIS HARDCODED LIST!! It is only as fallback for
- # transitional period and to be removed later. TLDs are now maintained in
- # sa-update 20_aux_tlds.cf.
- foreach (qw/
- com.ac edu.ac gov.ac mil.ac net.ac org.ac nom.ad ac.ae co.ae com.ae gov.ae
- mil.ae name.ae net.ae org.ae pro.ae sch.ae com.af edu.af gov.af net.af
- co.ag com.ag net.ag nom.ag org.ag com.ai edu.ai gov.ai net.ai off.ai
- org.ai com.al edu.al gov.al net.al org.al com.an edu.an net.an org.an
- co.ao ed.ao gv.ao it.ao og.ao pb.ao com.ar edu.ar gov.ar int.ar mil.ar
- net.ar org.ar e164.arpa in-addr.arpa ip6.arpa iris.arpa uri.arpa urn.arpa
- ac.at co.at gv.at or.at priv.at act.au asn.au com.au conf.au csiro.au
- edu.au gov.au id.au info.au net.au nsw.au nt.au org.au otc.au oz.au qld.au
- sa.au tas.au telememo.au vic.au wa.au com.aw biz.az com.az edu.az gov.az
- info.az int.az mil.az name.az net.az org.az pp.az co.ba com.ba edu.ba
- gov.ba mil.ba net.ba org.ba rs.ba unbi.ba unsa.ba com.bb edu.bb gov.bb
- net.bb org.bb ac.bd com.bd edu.bd gov.bd mil.bd net.bd org.bd ac.be
- belgie.be dns.be fgov.be gov.bf biz.bh cc.bh com.bh edu.bh gov.bh info.bh
- net.bh org.bh com.bm edu.bm gov.bm net.bm org.bm com.bn edu.bn net.bn
- org.bn com.bo edu.bo gob.bo gov.bo int.bo mil.bo net.bo org.bo tv.bo
- adm.br adv.br agr.br am.br arq.br art.br ato.br bio.br bmd.br cim.br
- cng.br cnt.br com.br coop.br dpn.br eco.br ecn.br edu.br eng.br esp.br
- etc.br eti.br far.br fm.br fnd.br fot.br fst.br g12.br ggf.br gov.br
- imb.br ind.br inf.br jor.br lel.br mat.br med.br mil.br mus.br net.br
- nom.br not.br ntr.br odo.br org.br ppg.br pro.br psc.br psi.br qsl.br
- rec.br slg.br srv.br tmp.br trd.br tur.br tv.br vet.br zlg.br com.bs
- net.bs org.bs com.bt edu.bt gov.bt net.bt org.bt co.bw org.bw gov.by
- mil.by com.bz net.bz org.bz ab.ca bc.ca gc.ca mb.ca nb.ca nf.ca nl.ca
- ns.ca nt.ca nu.ca on.ca pe.ca qc.ca sk.ca yk.ca co.ck edu.ck gov.ck net.ck
- org.ck ac.cn ah.cn bj.cn com.cn cq.cn edu.cn fj.cn gd.cn gov.cn gs.cn
- gx.cn gz.cn ha.cn hb.cn he.cn hi.cn hk.cn hl.cn hn.cn jl.cn js.cn jx.cn
- ln.cn mo.cn net.cn nm.cn nx.cn org.cn qh.cn sc.cn sd.cn sh.cn sn.cn sx.cn
- tj.cn tw.cn xj.cn xz.cn yn.cn zj.cn arts.co com.co edu.co firm.co gov.co
- info.co int.co mil.co net.co nom.co org.co rec.co web.co lkd.co.im
- ltd.co.im plc.co.im co.cm com.cm net.cm au.com br.com cn.com de.com eu.com
- gb.com hu.com no.com qc.com ru.com sa.com se.com uk.com us.com uy.com
- za.com ac.cr co.cr ed.cr fi.cr go.cr or.cr sa.cr com.cu edu.cu gov.cu
- inf.cu net.cu org.cu gov.cx ac.cy biz.cy com.cy ekloges.cy gov.cy ltd.cy
- name.cy net.cy org.cy parliament.cy press.cy pro.cy tm.cy co.dk com.dm
- edu.dm gov.dm net.dm org.dm art.do com.do edu.do gob.do gov.do mil.do
- net.do org.do sld.do web.do art.dz asso.dz com.dz edu.dz gov.dz net.dz
- org.dz pol.dz com.ec edu.ec fin.ec gov.ec info.ec k12.ec med.ec mil.ec
- net.ec org.ec pro.ec gob.ec co.ee com.ee edu.ee fie.ee med.ee org.ee
- pri.ee com.eg edu.eg eun.eg gov.eg mil.eg net.eg org.eg sci.eg com.er
- edu.er gov.er ind.er mil.er net.er org.er com.es edu.es gob.es nom.es
- org.es biz.et com.et edu.et gov.et info.et name.et net.et org.et aland.fi
- ac.fj biz.fj com.fj gov.fj id.fj info.fj mil.fj name.fj net.fj org.fj
- pro.fj school.fj ac.fk co.fk com.fk gov.fk net.fk nom.fk org.fk tm.fr
- asso.fr nom.fr prd.fr presse.fr com.fr gouv.fr com.ge edu.ge gov.ge mil.ge
- net.ge org.ge pvt.ge ac.gg alderney.gg co.gg gov.gg guernsey.gg ind.gg
- ltd.gg net.gg org.gg sark.gg sch.gg com.gh edu.gh gov.gh mil.gh org.gh
- com.gi edu.gi gov.gi ltd.gi mod.gi org.gi ac.gn com.gn gov.gn net.gn
- org.gn asso.gp com.gp edu.gp net.gp org.gp com.gr edu.gr gov.gr net.gr
- org.gr com.gt edu.gt gob.gt ind.gt mil.gt net.gt org.gt com.gu edu.gu
- gov.gu mil.gu net.gu org.gu com.hk edu.hk gov.hk idv.hk net.hk org.hk
- com.hn edu.hn gob.hn mil.hn net.hn org.hn com.hr from.hr iz.hr name.hr
- adult.ht art.ht asso.ht com.ht coop.ht edu.ht firm.ht gouv.ht info.ht
- med.ht net.ht org.ht perso.ht pol.ht pro.ht rel.ht shop.ht 2000.hu
- agrar.hu bolt.hu casino.hu city.hu co.hu erotica.hu erotika.hu film.hu
- forum.hu games.hu hotel.hu info.hu ingatlan.hu jogasz.hu konyvelo.hu
- lakas.hu media.hu news.hu org.hu priv.hu reklam.hu sex.hu shop.hu sport.hu
- suli.hu szex.hu tm.hu tozsde.hu utazas.hu video.hu ac.id co.id go.id
- mil.id net.id or.id sch.id web.id gov.ie ac.il co.il gov.il idf.il k12.il
- muni.il net.il org.il ac.im co.im gov.im net.im nic.im org.im ac.in co.in
- edu.in ernet.in firm.in gen.in gov.in ind.in mil.in net.in nic.in org.in
- res.in com.io gov.io mil.io net.io org.io ac.ir co.ir gov.ir id.ir net.ir
- org.ir sch.ir edu.it gov.it ac.je co.je gov.je ind.je jersey.je ltd.je
- net.je org.je sch.je com.jm edu.jm gov.jm net.jm org.jm com.jo edu.jo
- gov.jo mil.jo net.jo org.jo ac.jp ad.jp aichi.jp akita.jp aomori.jp
- chiba.jp co.jp ed.jp ehime.jp fukui.jp fukuoka.jp fukushima.jp gifu.jp
- go.jp gov.jp gr.jp gunma.jp hiroshima.jp hokkaido.jp hyogo.jp ibaraki.jp
- ishikawa.jp iwate.jp kagawa.jp kagoshima.jp kanagawa.jp kanazawa.jp
- kawasaki.jp kitakyushu.jp kobe.jp kochi.jp kumamoto.jp kyoto.jp lg.jp
- matsuyama.jp mie.jp miyagi.jp miyazaki.jp nagano.jp nagasaki.jp nagoya.jp
- nara.jp ne.jp net.jp niigata.jp oita.jp okayama.jp okinawa.jp or.jp org.jp
- osaka.jp saga.jp saitama.jp sapporo.jp sendai.jp shiga.jp shimane.jp
- shizuoka.jp takamatsu.jp tochigi.jp tokushima.jp tokyo.jp tottori.jp
- toyama.jp utsunomiya.jp wakayama.jp yamagata.jp yamaguchi.jp yamanashi.jp
- yokohama.jp ac.ke co.ke go.ke ne.ke new.ke or.ke sc.ke com.kg edu.kg
- gov.kg mil.kg net.kg org.kg com.kh edu.kh gov.kh mil.kh net.kh org.kh
- per.kh ac.kr busan.kr chungbuk.kr chungnam.kr co.kr daegu.kr daejeon.kr
- es.kr gangwon.kr go.kr gwangju.kr gyeongbuk.kr gyeonggi.kr gyeongnam.kr
- hs.kr incheon.kr jeju.kr jeonbuk.kr jeonnam.kr kg.kr kyonggi.kr mil.kr
- ms.kr ne.kr or.kr pe.kr re.kr sc.kr seoul.kr ulsan.kr com.kw edu.kw gov.kw
- mil.kw net.kw org.kw com.ky edu.ky gov.ky net.ky org.ky com.kz edu.kz
- gov.kz mil.kz net.kz org.kz com.la net.la org.la com.lb edu.lb gov.lb
- mil.lb net.lb org.lb com.lc edu.lc gov.lc net.lc org.lc assn.lk com.lk
- edu.lk gov.lk grp.lk hotel.lk int.lk ltd.lk net.lk ngo.lk org.lk sch.lk
- soc.lk web.lk com.lr edu.lr gov.lr net.lr org.lr co.ls org.ls gov.lt
- mil.lt asn.lv com.lv conf.lv edu.lv gov.lv id.lv mil.lv net.lv org.lv
- biz.ly com.ly edu.ly gov.ly id.ly med.ly net.ly org.ly plc.ly sch.ly ac.ma
- co.ma gov.ma net.ma org.ma press.ma asso.mc tm.mc ac.me co.me edu.me
- gov.me its.me net.me org.me priv.me com.mg edu.mg gov.mg mil.mg nom.mg
- org.mg prd.mg tm.mg army.mil navy.mil com.mk org.mk com.mm edu.mm gov.mm
- net.mm org.mm edu.mn gov.mn org.mn com.mo edu.mo gov.mo net.mo org.mo
- music.mobi weather.mobi co.mp edu.mp gov.mp net.mp org.mp com.mt edu.mt
- gov.mt net.mt org.mt tm.mt uu.mt co.mu com.mu aero.mv biz.mv com.mv
- coop.mv edu.mv gov.mv info.mv int.mv mil.mv museum.mv name.mv net.mv
- org.mv pro.mv ac.mw co.mw com.mw coop.mw edu.mw gov.mw int.mw museum.mw
- net.mw org.mw com.mx edu.mx gob.mx net.mx org.mx com.my edu.my gov.my
- mil.my name.my net.my org.my alt.na com.na cul.na edu.na net.na org.na
- telecom.na unam.na com.nc net.nc org.nc de.net gb.net uk.net ac.ng com.ng
- edu.ng gov.ng net.ng org.ng sch.ng ac.ni biz.ni com.ni edu.ni gob.ni in.ni
- info.ni int.ni mil.ni net.ni nom.ni org.ni web.ni fhs.no folkebibl.no
- fylkesbibl.no herad.no idrett.no kommune.no mil.no museum.no priv.no
- stat.no tel.no vgs.no com.np edu.np gov.np mil.np net.np org.np biz.nr
- co.nr com.nr edu.nr fax.nr gov.nr info.nr mob.nr mobil.nr mobile.nr net.nr
- org.nr tel.nr tlf.nr ac.nz co.nz cri.nz geek.nz gen.nz govt.nz iwi.nz
- maori.nz mil.nz net.nz org.nz school.nz ac.om biz.om co.om com.om edu.om
- gov.om med.om mil.om mod.om museum.om net.om org.om pro.om sch.om dk.org
- eu.org abo.pa ac.pa com.pa edu.pa gob.pa ing.pa med.pa net.pa nom.pa
- org.pa sld.pa com.pe edu.pe gob.pe mil.pe net.pe nom.pe org.pe com.pf
- edu.pf org.pf ac.pg com.pg net.pg com.ph edu.ph gov.ph mil.ph net.ph
- ngo.ph org.ph biz.pk com.pk edu.pk fam.pk gob.pk gok.pk gon.pk gop.pk
- gos.pk gov.pk net.pk org.pk web.pk art.pl biz.pl com.pl edu.pl gov.pl
- info.pl mil.pl net.pl ngo.pl org.pl biz.pr com.pr edu.pr gov.pr info.pr
- isla.pr name.pr net.pr org.pr pro.pr cpa.pro law.pro med.pro com.ps edu.ps
- gov.ps net.ps org.ps plo.ps sec.ps com.pt edu.pt gov.pt int.pt net.pt
- nome.pt org.pt publ.pt com.py edu.py gov.py net.py org.py com.qa edu.qa
- gov.qa net.qa org.qa asso.re com.re nom.re arts.ro com.ro firm.ro info.ro
- nom.ro nt.ro org.ro rec.ro store.ro tm.ro www.ro ac.rs co.rs edu.rs gov.rs
- in.rs org.rs ac.ru com.ru edu.ru gov.ru int.ru mil.ru net.ru org.ru pp.ru
- ac.rw co.rw com.rw edu.rw gouv.rw gov.rw int.rw mil.rw net.rw com.sa
- edu.sa gov.sa med.sa net.sa org.sa pub.sa sch.sa com.sb edu.sb gov.sb
- net.sb org.sb com.sc edu.sc gov.sc net.sc org.sc com.sd edu.sd gov.sd
- info.sd med.sd net.sd org.sd sch.sd tv.sd ab.se ac.se bd.se brand.se c.se
- d.se e.se f.se fh.se fhsk.se fhv.se g.se h.se i.se k.se komforb.se
- kommunalforbund.se komvux.se lanarb.se lanbib.se m.se mil.se n.se
- naturbruksgymn.se o.se org.se parti.se pp.se press.se s.se sshn.se t.se
- tm.se u.se w.se x.se y.se z.se com.sg edu.sg gov.sg idn.sg net.sg org.sg
- per.sg com.sh edu.sh gov.sh mil.sh net.sh org.sh edu.sk gov.sk mil.sk
- co.st com.st consulado.st edu.st embaixada.st gov.st mil.st net.st org.st
- principe.st saotome.st store.st com.sv edu.sv gob.sv org.sv red.sv com.sy
- gov.sy net.sy org.sy at.tf bg.tf ca.tf ch.tf cz.tf de.tf edu.tf eu.tf
- int.tf net.tf pl.tf ru.tf sg.tf us.tf ac.th co.th go.th in.th mi.th net.th
- or.th ac.tj biz.tj co.tj com.tj edu.tj go.tj gov.tj int.tj mil.tj name.tj
- net.tj org.tj web.tj com.tn edunet.tn ens.tn fin.tn gov.tn ind.tn info.tn
- intl.tn nat.tn net.tn org.tn rnrt.tn rns.tn rnu.tn tourism.tn gov.to
- av.tr bbs.tr bel.tr biz.tr com.tr dr.tr edu.tr gen.tr gov.tr
- info.tr k12.tr mil.tr name.tr net.tr org.tr pol.tr tel.tr web.tr aero.tt
- at.tt au.tt be.tt biz.tt ca.tt co.tt com.tt coop.tt de.tt dk.tt edu.tt
- es.tt eu.tt fr.tt gov.tt info.tt int.tt it.tt jobs.tt mobi.tt museum.tt
- name.tt net.tt nic.tt org.tt pro.tt se.tt travel.tt uk.tt us.tt co.tv
- gov.tv club.tw com.tw ebiz.tw edu.tw game.tw gov.tw idv.tw mil.tw net.tw
- org.tw ac.tz co.tz go.tz ne.tz or.tz cherkassy.ua chernigov.ua
- chernovtsy.ua ck.ua cn.ua co.ua com.ua crimea.ua cv.ua dn.ua
- dnepropetrovsk.ua donetsk.ua dp.ua edu.ua gov.ua if.ua in.ua
- ivano-frankivsk.ua kh.ua kharkov.ua kherson.ua khmelnitskiy.ua kiev.ua
- kirovograd.ua km.ua kr.ua ks.ua kv.ua lg.ua lugansk.ua lutsk.ua lviv.ua
- mk.ua net.ua nikolaev.ua od.ua odessa.ua org.ua pl.ua poltava.ua rovno.ua
- rv.ua sebastopol.ua sumy.ua te.ua ternopil.ua uzhgorod.ua vinnica.ua vn.ua
- zaporizhzhe.ua zhitomir.ua zp.ua zt.ua ac.ug co.ug go.ug ne.ug or.ug sc.ug
- ac.uk bl.uk british-library.uk co.uk edu.uk gov.uk icnet.uk jet.uk ltd.uk
- me.uk mod.uk national-library-scotland.uk net.uk nhs.uk nic.uk nls.uk
- org.uk parliament.uk plc.uk police.uk sch.uk ak.us al.us ar.us az.us ca.us
- co.us ct.us dc.us de.us dni.us fed.us fl.us ga.us hi.us ia.us id.us il.us
- in.us isa.us kids.us ks.us ky.us la.us ma.us md.us me.us mi.us mn.us mo.us
- ms.us mt.us nc.us nd.us ne.us nh.us nj.us nm.us nsn.us nv.us ny.us oh.us
- ok.us or.us pa.us ri.us sc.us sd.us tn.us tx.us ut.us va.us vt.us wa.us
- wi.us wv.us wy.us com.uy edu.uy gub.uy mil.uy net.uy org.uy vatican.va
- arts.ve bib.ve co.ve com.ve edu.ve firm.ve gov.ve info.ve int.ve mil.ve
- net.ve nom.ve org.ve rec.ve store.ve tec.ve web.ve co.vi com.vi edu.vi
- gov.vi net.vi org.vi ac.vn biz.vn com.vn edu.vn gov.vn health.vn info.vn
- int.vn name.vn net.vn org.vn pro.vn ch.vu com.vu de.vu edu.vu fr.vu net.vu
- org.vu com.ws edu.ws gov.ws net.ws org.ws com.ye edu.ye gov.ye mil.ye
- net.ye org.ye ac.za alt.za bourse.za city.za co.za edu.za gov.za law.za
- mil.za net.za ngo.za nom.za org.za school.za tm.za web.za ac.zm co.zm
- com.zm edu.zm gov.zm org.zm sch.zm ac.zw co.zw gov.zw org.zw
- /) { $self->{two_level_domains}{idn_to_ascii($_)} = 1 }
-
push (@cmds, {
setting => 'util_rb_2tld',
is_admin => 1,
@@ -3903,13 +3672,6 @@ Unicode labels encoded as UTF-8 octets.
=cut
- # DO NOT UPDATE THIS HARDCODED LIST!! It is only as fallback for
- # transitional period and to be removed later. TLDs are now maintained in
- # sa-update 20_aux_tlds.cf.
- foreach (qw/
- demon.co.uk esc.edu.ar lkd.co.im plc.co.im
- /) { $self->{three_level_domains}{idn_to_ascii($_)} = 1 }
-
push (@cmds, {
setting => 'util_rb_3tld',
is_admin => 1,
Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/FreeMail.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/FreeMail.pm?rev=1845096&r1=1845095&r2=1845096&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/FreeMail.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/FreeMail.pm Mon Oct 29 10:29:15 2018
@@ -155,7 +155,14 @@ sub new {
$self->register_eval_rule("check_freemail_header");
$self->register_eval_rule("check_freemail_body");
- # Need to init the regex here, utilizing registryboundaries->valid_tlds_re
+ return $self;
+}
+
+sub _init_email_regex {
+ my ($self) = @_;
+
+ dbg("initializing email regex");
+
# Some regexp tips courtesy of http://www.regular-expressions.info/email.html
# full email regex v0.02
$self->{email_regex} = qr/
@@ -168,10 +175,7 @@ sub new {
(?:[a-z0-9](?:[a-z0-9-]{0,59}[a-z0-9])?\.){1,4} # max 4x61 char parts (should be enough?)
$self->{main}->{registryboundaries}->{valid_tlds_re} # ends with valid tld
)
- (?!(?:[a-z0-9-]|\.[a-z0-9])) # make sure domain ends here
/xi;
-
- return $self;
}
sub set_config {
@@ -276,7 +280,8 @@ sub finish_parsing_end {
my $doms = join('|', @domains);
$self->{freemail_domains_re} = qr/\@(?:${doms})$/;
$wcount = scalar @domains;
- undef %{$self->{freemail_temp_wc}};
+ undef $self->{freemail_temp_wc};
+ delete $self->{freemail_temp_wc};
}
my $count = scalar keys %{$self->{freemail_domains}};
@@ -293,6 +298,12 @@ sub finish_parsing_end {
$self->{freemail_available} = 0;
}
+ # valid_tlds_re will be available at finish_parsing_end, compile it now,
+ # we only need to do it once and before possible forking
+ if ($self->{freemail_available} && !$self->{email_regex}) {
+ $self->_init_email_regex();
+ }
+
return 0;
}
Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/HashBL.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/HashBL.pm?rev=1845096&r1=1845095&r2=1845096&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/HashBL.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/HashBL.pm Mon Oct 29 10:29:15 2018
@@ -78,7 +78,26 @@ sub new {
$self->set_config($mailsa->{conf});
$self->register_eval_rule("check_hashbl_emails");
- # Need to init the regex here, utilizing registryboundaries->valid_tlds_re
+ return $self;
+}
+
+sub finish_parsing_end {
+ my ($self, $opts) = @_;
+
+ # valid_tlds_re will be available at finish_parsing_end, compile it now,
+ # we only need to do it once and before possible forking
+ if ($self->{hashbl_available} && !$self->{email_regex}) {
+ $self->_init_email_regex();
+ }
+
+ return 0;
+}
+
+sub _init_email_regex {
+ my ($self) = @_;
+
+ dbg("initializing email regex");
+
# Some regexp tips courtesy of http://www.regular-expressions.info/email.html
# full email regex v0.02
$self->{email_regex} = qr/
@@ -91,10 +110,7 @@ sub new {
(?:[a-z0-9](?:[a-z0-9-]{0,59}[a-z0-9])?\.){1,4} # max 4x61 char parts (should be enough?)
$self->{main}->{registryboundaries}->{valid_tlds_re} # ends with valid tld
)
- (?!(?:[a-z0-9-]|\.[a-z0-9])) # make sure domain ends here
/xi;
-
- return $self;
}
sub set_config {
Modified: spamassassin/trunk/lib/Mail/SpamAssassin/RegistryBoundaries.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/RegistryBoundaries.pm?rev=1845096&r1=1845095&r2=1845096&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/RegistryBoundaries.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/RegistryBoundaries.pm Mon Oct 29 10:29:15 2018
@@ -53,13 +53,23 @@ sub new {
# Initialize valid_tlds_re for schemeless uri parsing, FreeMail etc
if ($self->{conf}->{valid_tlds} && %{$self->{conf}->{valid_tlds}}) {
# International domain names are already in ASCII-compatible encoding (ACE)
- my $tlds = join('|', keys %{$self->{conf}->{valid_tlds}});
+ my $tlds =
+ '(?<![a-zA-Z0-9-])'. # make sure tld starts at boundary
+ join('|', keys %{$self->{conf}->{valid_tlds}}).
+ '(?!(?:[a-zA-Z0-9-]|\.[a-zA-Z0-9]))'; # make sure it ends
# Perl 5.10+ trie optimizes lists, no need for fancy regex optimizing
- $self->{valid_tlds_re} = qr/(?:$tlds)/i;
+ if (eval { $self->{valid_tlds_re} = qr/$tlds/; 1; }) {
+ dbg("config: registryboundaries: %d tlds loaded",
+ scalar keys %{$self->{conf}->{valid_tlds}});
+ } else {
+ warn "config: registryboundaries: failed to compile valid_tlds_re: $@\n";
+ $self->{valid_tlds_re} = qr/no_tlds_defined/;
+ }
}
else {
# Failsafe in case no tlds defined, we don't want this to match everything..
$self->{valid_tlds_re} = qr/no_tlds_defined/;
+ warn "config: registryboundaries: no tlds defined, need to run sa-update\n";
}
$self;