You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by jq...@apache.org on 2015/04/06 19:52:12 UTC

svn commit: r1671621 - in /spamassassin/trunk: ./ lib/Mail/ lib/Mail/SpamAssassin/ lib/Mail/SpamAssassin/Plugin/ lib/Mail/SpamAssassin/Util/ t/

Author: jquinn
Date: Mon Apr  6 17:52:12 2015
New Revision: 1671621

URL: http://svn.apache.org/r1671621
Log:
big patch for Bug 6782 registrar boundaries in cf file

Added:
    spamassassin/trunk/lib/Mail/SpamAssassin/RegistryBoundaries.pm
Modified:
    spamassassin/trunk/MANIFEST
    spamassassin/trunk/lib/Mail/SpamAssassin.pm
    spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm
    spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm
    spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/FreeMail.pm
    spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/HTTPSMismatch.pm
    spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/HeaderEval.pm
    spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/URIDNSBL.pm
    spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/URIEval.pm
    spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/WLBLEval.pm
    spamassassin/trunk/lib/Mail/SpamAssassin/Util.pm
    spamassassin/trunk/lib/Mail/SpamAssassin/Util/RegistrarBoundaries.pm
    spamassassin/trunk/t/ip_addrs.t
    spamassassin/trunk/t/uri.t

Modified: spamassassin/trunk/MANIFEST
URL: http://svn.apache.org/viewvc/spamassassin/trunk/MANIFEST?rev=1671621&r1=1671620&r2=1671621&view=diff
==============================================================================
--- spamassassin/trunk/MANIFEST (original)
+++ spamassassin/trunk/MANIFEST Mon Apr  6 17:52:12 2015
@@ -115,6 +115,7 @@ lib/Mail/SpamAssassin/Plugin/WLBLEval.pm
 lib/Mail/SpamAssassin/Plugin/WhiteListSubject.pm
 lib/Mail/SpamAssassin/PluginHandler.pm
 lib/Mail/SpamAssassin/Plugin/URILocalBL.pm
+lib/Mail/SpamAssassin/RegistryBoundaries.pm
 lib/Mail/SpamAssassin/Reporter.pm
 lib/Mail/SpamAssassin/SQLBasedAddrList.pm
 lib/Mail/SpamAssassin/SpamdForkScaling.pm

Modified: spamassassin/trunk/lib/Mail/SpamAssassin.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin.pm?rev=1671621&r1=1671620&r2=1671621&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin.pm Mon Apr  6 17:52:12 2015
@@ -75,6 +75,7 @@ use Mail::SpamAssassin::PerMsgStatus;
 use Mail::SpamAssassin::Message;
 use Mail::SpamAssassin::PluginHandler;
 use Mail::SpamAssassin::DnsResolver;
+use Mail::SpamAssassin::RegistryBoundaries;
 use Mail::SpamAssassin::Util qw(untaint_var am_running_on_windows);
 use Mail::SpamAssassin::Util::ScopedTimer;
 
@@ -430,6 +431,7 @@ sub new {
   }
 
   $self->{conf} ||= new Mail::SpamAssassin::Conf ($self);
+  $self->{registryboundaries} = Mail::SpamAssassin::RegistryBoundaries->new ($self);
   $self->{plugins} = Mail::SpamAssassin::PluginHandler->new ($self);
 
   $self->{save_pattern_hits} ||= 0;

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm?rev=1671621&r1=1671620&r2=1671621&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm Mon Apr  6 17:52:12 2015
@@ -3473,13 +3473,72 @@ subdomain of the specified zone.
 
 =item util_rb_tld tld1 tld2 ...
 
-This option allows the addition of new TLDs to the RegistrarBoundaries code.
-Updates to the list usually happen when new versions of SpamAssassin are
-released, but sometimes it's necessary to add in new TLDs faster than a
-release can occur.  TLDs include things like com, net, org, etc.
+This option maintains list of valid TLDs in the RegistryBoundaries code. 
+TLDs include things like com, net, org, etc.
 
 =cut
 
+  # DO NOT UPDATE THIS HARDCODED LIST!! It is only as fallback for
+  # transitional period and to be removed later.  TLDs are now maintained in
+  # sa-update 20_aux_tlds.cf.
+  foreach (qw/
+    ac academy accountants active actor ad ae aero af ag agency ai airforce al am an
+    ao aq ar archi army arpa as asia associates at attorney au auction audio autos
+    aw ax axa az ba bar bargains bayern bb bd be beer berlin best bf bg bh bi bid
+    bike bio biz bj black blackfriday blue bm bmw bn bnpparibas bo boo boutique br
+    brussels bs bt build builders business buzz bv bw by bz bzh ca cab camera camp
+    cancerresearch capetown capital caravan cards care career careers cash cat
+    catering cc cd center ceo cern cf cg ch cheap christmas church ci citic city ck
+    cl claims cleaning click clinic clothing club cm cn co codes coffee college
+    cologne com community company computer condos construction consulting
+    contractors cooking cool coop country cr credit creditcard cruises cu cuisinella
+    cv cw cx cy cymru cz dad dance dating day de deals degree democrat dental
+    dentist desi diamonds diet digital direct directory discount dj dk dm dnp do
+    domains durban dz eat ec edu education ee eg email engineer engineering
+    enterprises equipment er es esq estate et eu eus events exchange expert exposed
+    fail farm feedback fi finance financial fish fishing fitness fj fk flights
+    florist fm fo foo foundation fr frl frogans fund furniture futbol ga gal gallery
+    gb gbiz gd ge gent gf gg gh gi gift gifts gives gl glass global globo gm gmail
+    gmo gn gop gov gp gq gr graphics gratis green gripe gs gt gu guide guitars guru
+    gw gy hamburg haus healthcare help here hiphop hiv hk hm hn holdings holiday
+    homes horse host hosting house how hr ht hu id ie il im immo immobilien in
+    industries info ing ink institute insure int international investments io iq ir
+    is it je jetzt jm jo jobs joburg jp juegos kaufen ke kg kh ki kim kitchen kiwi
+    km kn koeln kp kr krd kred kw ky kz la lacaixa land lawyer lb lc lease lgbt li
+    life lighting limited limo link lk loans london lotto lr ls lt ltda lu luxe
+    luxury lv ly ma maison management mango market marketing mc md me media meet
+    melbourne meme menu mg mh miami mil mini mk ml mm mn mo mobi moda moe monash
+    mortgage moscow motorcycles mov mp mq mr ms mt mu museum mv mw mx my mz na
+    nagoya name navy nc ne net network neustar new nf ng ngo nhk ni ninja nl no np
+    nr nra nrw nu nyc nz okinawa om ong onl ooo org organic otsuka ovh pa paris
+    partners parts pe pf pg ph photo photography photos physio pics pictures pink
+    pizza pk pl place plumbing pm pn post pr praxi press pro prod productions
+    properties property ps pt pub pw py qa qpon quebec re realtor recipes red rehab
+    reise reisen ren rentals repair report republican rest restaurant reviews rich
+    rio ro rocks rodeo rs rsvp ru ruhr rw ryukyu sa saarland sarl sb sc sca scb
+    schmidt schule scot sd se services sexy sg sh shiksha shoes si singles sj sk sl
+    sm sn so social software sohu solar solutions soy space spiegel sr st su
+    supplies supply support surf surgery suzuki sv sx sy systems sz tatar tattoo tax
+    tc td technology tel tf tg th tienda tips tirol tj tk tl tm tn to today tokyo
+    tools top town toys tp tr trade training travel tt tv tw tz ua ug uk university
+    uno uol us uy uz va vacations vc ve vegas ventures versicherung vet vg vi viajes
+    villas vision vlaanderen vn vodka vote voting voto voyage vu wales wang watch
+    webcam website wed wf whoswho wien wiki williamhill works ws wtc wtf xn--1qqw23a
+    xn--3bst00m xn--3ds443g xn--3e0b707e xn--45brj9c xn--4gbrim xn--55qw42g
+    xn--55qx5d xn--6frz82g xn--6qq986b3xl xn--80adxhks xn--80ao21a xn--80asehdb
+    xn--80aswg xn--90a3ac xn--c1avg xn--cg4bki xn--clchc0ea0b2g2a9gcd xn--czr694b
+    xn--czru2d xn--d1acj3b xn--fiq228c5hs xn--fiq64b xn--fiqs8s xn--fiqz9s
+    xn--fpcrj9c3d xn--fzc2c9e2c xn--gecrj9c xn--h2brj9c xn--i1b6b1a6a2e xn--io0a7i
+    xn--j1amh xn--j6w193g xn--kprw13d xn--kpry57d xn--kput3i xn--l1acc
+    xn--lgbbat1ad8j xn--mgb9awbf xn--mgba3a4f16a xn--mgbaam7a8h xn--mgbab2bd
+    xn--mgbayh7gpa xn--mgbbh1a71e xn--mgbc0a9azcg xn--mgberp4a5d4ar xn--mgbx4cd0ab
+    xn--ngbc5azd xn--nqv7f xn--nqv7fs00ema xn--o3cw4h xn--ogbpf8fl xn--p1ai
+    xn--pgbs0dh xn--q9jyb4c xn--rhqv96g xn--s9brj9c xn--ses554g xn--unup4y xn--vhquv
+    xn--wgbh1c xn--wgbl6a xn--xhq521b xn--xkc2al3hye2a xn--xkc2dl3a5ee0h
+    xn--yfro4i67o xn--ygbi2ammx xn--zfr164b xxx xyz yachts yandex ye yokohama
+    youtube yt za zm zone zw
+    /) { $self->{valid_tlds}{lc $_} = 1; }
+
   push (@cmds, {
     setting => 'util_rb_tld',
     is_admin => 1,
@@ -3488,25 +3547,191 @@ release can occur.  TLDs include things
       unless (defined $value && $value !~ /^$/) {
 	return $MISSING_REQUIRED_VALUE;
       }
-      unless ($value =~ /^[a-zA-Z]+(?:\s+[a-zA-Z]+)*$/) {
+      unless ($value =~ /^[^\s.]+(?:\s+[^\s.]+)*$/) {
 	return $INVALID_VALUE;
       }
       foreach (split(/\s+/, $value)) {
-        $Mail::SpamAssassin::Util::RegistrarBoundaries::VALID_TLDS{lc $_} = 1;
+        $self->{valid_tlds}{lc $_} = 1;
       }
     }
   });
 
 =item util_rb_2tld 2tld-1.tld 2tld-2.tld ...
 
-This option allows the addition of new 2nd-level TLDs (2TLD) to the
-RegistrarBoundaries code.  Updates to the list usually happen when new
-versions of SpamAssassin are released, but sometimes it's necessary to add in
-new 2TLDs faster than a release can occur.  2TLDs include things like co.uk,
-fed.us, etc.
+This option maintains list of valid 2nd-level TLDs in the RegistryBoundaries
+code.  2TLDs include things like co.uk, fed.us, etc.
 
 =cut
 
+  # DO NOT UPDATE THIS HARDCODED LIST!! It is only as fallback for
+  # transitional period and to be removed later.  TLDs are now maintained in
+  # sa-update 20_aux_tlds.cf.
+  foreach (qw/
+    com.ac edu.ac gov.ac mil.ac net.ac org.ac nom.ad ac.ae co.ae com.ae gov.ae
+    mil.ae name.ae net.ae org.ae pro.ae sch.ae com.af edu.af gov.af net.af
+    co.ag com.ag net.ag nom.ag org.ag com.ai edu.ai gov.ai net.ai off.ai
+    org.ai com.al edu.al gov.al net.al org.al com.an edu.an net.an org.an
+    co.ao ed.ao gv.ao it.ao og.ao pb.ao com.ar edu.ar gov.ar int.ar mil.ar
+    net.ar org.ar e164.arpa in-addr.arpa ip6.arpa iris.arpa uri.arpa urn.arpa
+    ac.at co.at gv.at or.at priv.at act.au asn.au com.au conf.au csiro.au
+    edu.au gov.au id.au info.au net.au nsw.au nt.au org.au otc.au oz.au qld.au
+    sa.au tas.au telememo.au vic.au wa.au com.aw biz.az com.az edu.az gov.az
+    info.az int.az mil.az name.az net.az org.az pp.az co.ba com.ba edu.ba
+    gov.ba mil.ba net.ba org.ba rs.ba unbi.ba unsa.ba com.bb edu.bb gov.bb
+    net.bb org.bb ac.bd com.bd edu.bd gov.bd mil.bd net.bd org.bd ac.be
+    belgie.be dns.be fgov.be gov.bf biz.bh cc.bh com.bh edu.bh gov.bh info.bh
+    net.bh org.bh com.bm edu.bm gov.bm net.bm org.bm com.bn edu.bn net.bn
+    org.bn com.bo edu.bo gob.bo gov.bo int.bo mil.bo net.bo org.bo tv.bo
+    adm.br adv.br agr.br am.br arq.br art.br ato.br bio.br bmd.br cim.br
+    cng.br cnt.br com.br coop.br dpn.br eco.br ecn.br edu.br eng.br esp.br
+    etc.br eti.br far.br fm.br fnd.br fot.br fst.br g12.br ggf.br gov.br
+    imb.br ind.br inf.br jor.br lel.br mat.br med.br mil.br mus.br net.br
+    nom.br not.br ntr.br odo.br org.br ppg.br pro.br psc.br psi.br qsl.br
+    rec.br slg.br srv.br tmp.br trd.br tur.br tv.br vet.br zlg.br com.bs
+    net.bs org.bs com.bt edu.bt gov.bt net.bt org.bt co.bw org.bw gov.by
+    mil.by com.bz net.bz org.bz ab.ca bc.ca gc.ca mb.ca nb.ca nf.ca nl.ca
+    ns.ca nt.ca nu.ca on.ca pe.ca qc.ca sk.ca yk.ca co.ck edu.ck gov.ck net.ck
+    org.ck ac.cn ah.cn bj.cn com.cn cq.cn edu.cn fj.cn gd.cn gov.cn gs.cn
+    gx.cn gz.cn ha.cn hb.cn he.cn hi.cn hk.cn hl.cn hn.cn jl.cn js.cn jx.cn
+    ln.cn mo.cn net.cn nm.cn nx.cn org.cn qh.cn sc.cn sd.cn sh.cn sn.cn sx.cn
+    tj.cn tw.cn xj.cn xz.cn yn.cn zj.cn arts.co com.co edu.co firm.co gov.co
+    info.co int.co mil.co net.co nom.co org.co rec.co web.co lkd.co.im
+    ltd.co.im plc.co.im co.cm com.cm net.cm au.com br.com cn.com de.com eu.com
+    gb.com hu.com no.com qc.com ru.com sa.com se.com uk.com us.com uy.com
+    za.com ac.cr co.cr ed.cr fi.cr go.cr or.cr sa.cr com.cu edu.cu gov.cu
+    inf.cu net.cu org.cu gov.cx ac.cy biz.cy com.cy ekloges.cy gov.cy ltd.cy
+    name.cy net.cy org.cy parliament.cy press.cy pro.cy tm.cy co.dk com.dm
+    edu.dm gov.dm net.dm org.dm art.do com.do edu.do gob.do gov.do mil.do
+    net.do org.do sld.do web.do art.dz asso.dz com.dz edu.dz gov.dz net.dz
+    org.dz pol.dz com.ec edu.ec fin.ec gov.ec info.ec k12.ec med.ec mil.ec
+    net.ec org.ec pro.ec gob.ec co.ee com.ee edu.ee fie.ee med.ee org.ee
+    pri.ee com.eg edu.eg eun.eg gov.eg mil.eg net.eg org.eg sci.eg com.er
+    edu.er gov.er ind.er mil.er net.er org.er com.es edu.es gob.es nom.es
+    org.es biz.et com.et edu.et gov.et info.et name.et net.et org.et aland.fi
+    ac.fj biz.fj com.fj gov.fj id.fj info.fj mil.fj name.fj net.fj org.fj
+    pro.fj school.fj ac.fk co.fk com.fk gov.fk net.fk nom.fk org.fk tm.fr
+    asso.fr nom.fr prd.fr presse.fr com.fr gouv.fr com.ge edu.ge gov.ge mil.ge
+    net.ge org.ge pvt.ge ac.gg alderney.gg co.gg gov.gg guernsey.gg ind.gg
+    ltd.gg net.gg org.gg sark.gg sch.gg com.gh edu.gh gov.gh mil.gh org.gh
+    com.gi edu.gi gov.gi ltd.gi mod.gi org.gi ac.gn com.gn gov.gn net.gn
+    org.gn asso.gp com.gp edu.gp net.gp org.gp com.gr edu.gr gov.gr net.gr
+    org.gr com.gt edu.gt gob.gt ind.gt mil.gt net.gt org.gt com.gu edu.gu
+    gov.gu mil.gu net.gu org.gu com.hk edu.hk gov.hk idv.hk net.hk org.hk
+    com.hn edu.hn gob.hn mil.hn net.hn org.hn com.hr from.hr iz.hr name.hr
+    adult.ht art.ht asso.ht com.ht coop.ht edu.ht firm.ht gouv.ht info.ht
+    med.ht net.ht org.ht perso.ht pol.ht pro.ht rel.ht shop.ht 2000.hu
+    agrar.hu bolt.hu casino.hu city.hu co.hu erotica.hu erotika.hu film.hu
+    forum.hu games.hu hotel.hu info.hu ingatlan.hu jogasz.hu konyvelo.hu
+    lakas.hu media.hu news.hu org.hu priv.hu reklam.hu sex.hu shop.hu sport.hu
+    suli.hu szex.hu tm.hu tozsde.hu utazas.hu video.hu ac.id co.id go.id
+    mil.id net.id or.id sch.id web.id gov.ie ac.il co.il gov.il idf.il k12.il
+    muni.il net.il org.il ac.im co.im gov.im net.im nic.im org.im ac.in co.in
+    edu.in ernet.in firm.in gen.in gov.in ind.in mil.in net.in nic.in org.in
+    res.in com.io gov.io mil.io net.io org.io ac.ir co.ir gov.ir id.ir net.ir
+    org.ir sch.ir edu.it gov.it ac.je co.je gov.je ind.je jersey.je ltd.je
+    net.je org.je sch.je com.jm edu.jm gov.jm net.jm org.jm com.jo edu.jo
+    gov.jo mil.jo net.jo org.jo ac.jp ad.jp aichi.jp akita.jp aomori.jp
+    chiba.jp co.jp ed.jp ehime.jp fukui.jp fukuoka.jp fukushima.jp gifu.jp
+    go.jp gov.jp gr.jp gunma.jp hiroshima.jp hokkaido.jp hyogo.jp ibaraki.jp
+    ishikawa.jp iwate.jp kagawa.jp kagoshima.jp kanagawa.jp kanazawa.jp
+    kawasaki.jp kitakyushu.jp kobe.jp kochi.jp kumamoto.jp kyoto.jp lg.jp
+    matsuyama.jp mie.jp miyagi.jp miyazaki.jp nagano.jp nagasaki.jp nagoya.jp
+    nara.jp ne.jp net.jp niigata.jp oita.jp okayama.jp okinawa.jp or.jp org.jp
+    osaka.jp saga.jp saitama.jp sapporo.jp sendai.jp shiga.jp shimane.jp
+    shizuoka.jp takamatsu.jp tochigi.jp tokushima.jp tokyo.jp tottori.jp
+    toyama.jp utsunomiya.jp wakayama.jp yamagata.jp yamaguchi.jp yamanashi.jp
+    yokohama.jp ac.ke co.ke go.ke ne.ke new.ke or.ke sc.ke com.kg edu.kg
+    gov.kg mil.kg net.kg org.kg com.kh edu.kh gov.kh mil.kh net.kh org.kh
+    per.kh ac.kr busan.kr chungbuk.kr chungnam.kr co.kr daegu.kr daejeon.kr
+    es.kr gangwon.kr go.kr gwangju.kr gyeongbuk.kr gyeonggi.kr gyeongnam.kr
+    hs.kr incheon.kr jeju.kr jeonbuk.kr jeonnam.kr kg.kr kyonggi.kr mil.kr
+    ms.kr ne.kr or.kr pe.kr re.kr sc.kr seoul.kr ulsan.kr com.kw edu.kw gov.kw
+    mil.kw net.kw org.kw com.ky edu.ky gov.ky net.ky org.ky com.kz edu.kz
+    gov.kz mil.kz net.kz org.kz com.la net.la org.la com.lb edu.lb gov.lb
+    mil.lb net.lb org.lb com.lc edu.lc gov.lc net.lc org.lc assn.lk com.lk
+    edu.lk gov.lk grp.lk hotel.lk int.lk ltd.lk net.lk ngo.lk org.lk sch.lk
+    soc.lk web.lk com.lr edu.lr gov.lr net.lr org.lr co.ls org.ls gov.lt
+    mil.lt asn.lv com.lv conf.lv edu.lv gov.lv id.lv mil.lv net.lv org.lv
+    biz.ly com.ly edu.ly gov.ly id.ly med.ly net.ly org.ly plc.ly sch.ly ac.ma
+    co.ma gov.ma net.ma org.ma press.ma asso.mc tm.mc ac.me co.me edu.me
+    gov.me its.me net.me org.me priv.me com.mg edu.mg gov.mg mil.mg nom.mg
+    org.mg prd.mg tm.mg army.mil navy.mil com.mk org.mk com.mm edu.mm gov.mm
+    net.mm org.mm edu.mn gov.mn org.mn com.mo edu.mo gov.mo net.mo org.mo
+    music.mobi weather.mobi co.mp edu.mp gov.mp net.mp org.mp com.mt edu.mt
+    gov.mt net.mt org.mt tm.mt uu.mt co.mu com.mu aero.mv biz.mv com.mv
+    coop.mv edu.mv gov.mv info.mv int.mv mil.mv museum.mv name.mv net.mv
+    org.mv pro.mv ac.mw co.mw com.mw coop.mw edu.mw gov.mw int.mw museum.mw
+    net.mw org.mw com.mx edu.mx gob.mx net.mx org.mx com.my edu.my gov.my
+    mil.my name.my net.my org.my alt.na com.na cul.na edu.na net.na org.na
+    telecom.na unam.na com.nc net.nc org.nc de.net gb.net uk.net ac.ng com.ng
+    edu.ng gov.ng net.ng org.ng sch.ng ac.ni biz.ni com.ni edu.ni gob.ni in.ni
+    info.ni int.ni mil.ni net.ni nom.ni org.ni web.ni fhs.no folkebibl.no
+    fylkesbibl.no herad.no idrett.no kommune.no mil.no museum.no priv.no
+    stat.no tel.no vgs.no com.np edu.np gov.np mil.np net.np org.np biz.nr
+    co.nr com.nr edu.nr fax.nr gov.nr info.nr mob.nr mobil.nr mobile.nr net.nr
+    org.nr tel.nr tlf.nr ac.nz co.nz cri.nz geek.nz gen.nz govt.nz iwi.nz
+    maori.nz mil.nz net.nz org.nz school.nz ac.om biz.om co.om com.om edu.om
+    gov.om med.om mil.om mod.om museum.om net.om org.om pro.om sch.om dk.org
+    eu.org abo.pa ac.pa com.pa edu.pa gob.pa ing.pa med.pa net.pa nom.pa
+    org.pa sld.pa com.pe edu.pe gob.pe mil.pe net.pe nom.pe org.pe com.pf
+    edu.pf org.pf ac.pg com.pg net.pg com.ph edu.ph gov.ph mil.ph net.ph
+    ngo.ph org.ph biz.pk com.pk edu.pk fam.pk gob.pk gok.pk gon.pk gop.pk
+    gos.pk gov.pk net.pk org.pk web.pk art.pl biz.pl com.pl edu.pl gov.pl
+    info.pl mil.pl net.pl ngo.pl org.pl biz.pr com.pr edu.pr gov.pr info.pr
+    isla.pr name.pr net.pr org.pr pro.pr cpa.pro law.pro med.pro com.ps edu.ps
+    gov.ps net.ps org.ps plo.ps sec.ps com.pt edu.pt gov.pt int.pt net.pt
+    nome.pt org.pt publ.pt com.py edu.py gov.py net.py org.py com.qa edu.qa
+    gov.qa net.qa org.qa asso.re com.re nom.re arts.ro com.ro firm.ro info.ro
+    nom.ro nt.ro org.ro rec.ro store.ro tm.ro www.ro ac.rs co.rs edu.rs gov.rs
+    in.rs org.rs ac.ru com.ru edu.ru gov.ru int.ru mil.ru net.ru org.ru pp.ru
+    ac.rw co.rw com.rw edu.rw gouv.rw gov.rw int.rw mil.rw net.rw com.sa
+    edu.sa gov.sa med.sa net.sa org.sa pub.sa sch.sa com.sb edu.sb gov.sb
+    net.sb org.sb com.sc edu.sc gov.sc net.sc org.sc com.sd edu.sd gov.sd
+    info.sd med.sd net.sd org.sd sch.sd tv.sd ab.se ac.se bd.se brand.se c.se
+    d.se e.se f.se fh.se fhsk.se fhv.se g.se h.se i.se k.se komforb.se
+    kommunalforbund.se komvux.se lanarb.se lanbib.se m.se mil.se n.se
+    naturbruksgymn.se o.se org.se parti.se pp.se press.se s.se sshn.se t.se
+    tm.se u.se w.se x.se y.se z.se com.sg edu.sg gov.sg idn.sg net.sg org.sg
+    per.sg com.sh edu.sh gov.sh mil.sh net.sh org.sh edu.sk gov.sk mil.sk
+    co.st com.st consulado.st edu.st embaixada.st gov.st mil.st net.st org.st
+    principe.st saotome.st store.st com.sv edu.sv gob.sv org.sv red.sv com.sy
+    gov.sy net.sy org.sy at.tf bg.tf ca.tf ch.tf cz.tf de.tf edu.tf eu.tf
+    int.tf net.tf pl.tf ru.tf sg.tf us.tf ac.th co.th go.th in.th mi.th net.th
+    or.th ac.tj biz.tj co.tj com.tj edu.tj go.tj gov.tj int.tj mil.tj name.tj
+    net.tj org.tj web.tj com.tn edunet.tn ens.tn fin.tn gov.tn ind.tn info.tn
+    intl.tn nat.tn net.tn org.tn rnrt.tn rns.tn rnu.tn tourism.tn gov.to
+    gov.tp av.tr bbs.tr bel.tr biz.tr com.tr dr.tr edu.tr gen.tr gov.tr
+    info.tr k12.tr mil.tr name.tr net.tr org.tr pol.tr tel.tr web.tr aero.tt
+    at.tt au.tt be.tt biz.tt ca.tt co.tt com.tt coop.tt de.tt dk.tt edu.tt
+    es.tt eu.tt fr.tt gov.tt info.tt int.tt it.tt jobs.tt mobi.tt museum.tt
+    name.tt net.tt nic.tt org.tt pro.tt se.tt travel.tt uk.tt us.tt co.tv
+    gov.tv club.tw com.tw ebiz.tw edu.tw game.tw gov.tw idv.tw mil.tw net.tw
+    org.tw ac.tz co.tz go.tz ne.tz or.tz cherkassy.ua chernigov.ua
+    chernovtsy.ua ck.ua cn.ua co.ua com.ua crimea.ua cv.ua dn.ua
+    dnepropetrovsk.ua donetsk.ua dp.ua edu.ua gov.ua if.ua in.ua
+    ivano-frankivsk.ua kh.ua kharkov.ua kherson.ua khmelnitskiy.ua kiev.ua
+    kirovograd.ua km.ua kr.ua ks.ua kv.ua lg.ua lugansk.ua lutsk.ua lviv.ua
+    mk.ua net.ua nikolaev.ua od.ua odessa.ua org.ua pl.ua poltava.ua rovno.ua
+    rv.ua sebastopol.ua sumy.ua te.ua ternopil.ua uzhgorod.ua vinnica.ua vn.ua
+    zaporizhzhe.ua zhitomir.ua zp.ua zt.ua ac.ug co.ug go.ug ne.ug or.ug sc.ug
+    ac.uk bl.uk british-library.uk co.uk edu.uk gov.uk icnet.uk jet.uk ltd.uk
+    me.uk mod.uk national-library-scotland.uk net.uk nhs.uk nic.uk nls.uk
+    org.uk parliament.uk plc.uk police.uk sch.uk ak.us al.us ar.us az.us ca.us
+    co.us ct.us dc.us de.us dni.us fed.us fl.us ga.us hi.us ia.us id.us il.us
+    in.us isa.us kids.us ks.us ky.us la.us ma.us md.us me.us mi.us mn.us mo.us
+    ms.us mt.us nc.us nd.us ne.us nh.us nj.us nm.us nsn.us nv.us ny.us oh.us
+    ok.us or.us pa.us ri.us sc.us sd.us tn.us tx.us ut.us va.us vt.us wa.us
+    wi.us wv.us wy.us com.uy edu.uy gub.uy mil.uy net.uy org.uy vatican.va
+    arts.ve bib.ve co.ve com.ve edu.ve firm.ve gov.ve info.ve int.ve mil.ve
+    net.ve nom.ve org.ve rec.ve store.ve tec.ve web.ve co.vi com.vi edu.vi
+    gov.vi net.vi org.vi ac.vn biz.vn com.vn edu.vn gov.vn health.vn info.vn
+    int.vn name.vn net.vn org.vn pro.vn ch.vu com.vu de.vu edu.vu fr.vu net.vu
+    org.vu com.ws edu.ws gov.ws net.ws org.ws com.ye edu.ye gov.ye mil.ye
+    net.ye org.ye ac.za alt.za bourse.za city.za co.za edu.za gov.za law.za
+    mil.za net.za ngo.za nom.za org.za school.za tm.za web.za ac.zm co.zm
+    com.zm edu.zm gov.zm org.zm sch.zm ac.zw co.zw gov.zw org.zw
+    /) { $self->{two_level_domains}{lc $_} = 1; }
+
   push (@cmds, {
     setting => 'util_rb_2tld',
     is_admin => 1,
@@ -3519,21 +3744,25 @@ fed.us, etc.
 	return $INVALID_VALUE;
       }
       foreach (split(/\s+/, $value)) {
-        $Mail::SpamAssassin::Util::RegistrarBoundaries::TWO_LEVEL_DOMAINS{lc $_} = 1;
+        $self->{two_level_domains}{lc $_} = 1;
       }
     }
   });
 
 =item util_rb_3tld 3tld1.some.tld 3tld2.other.tld ...
 
-This option allows the addition of new 3rd-level TLDs (3TLD) to the
-RegistrarBoundaries code.  Updates to the list usually happen when new
-versions of SpamAssassin are released, but sometimes it's necessary to add in
-new 3TLDs faster than a release can occur.  3TLDs include things like
-demon.co.uk, plc.co.im, etc.
+This option maintains list of valid 3rd-level TLDs in the RegistryBoundaries
+code.  3TLDs include things like demon.co.uk, plc.co.im, etc.
 
 =cut
 
+  # DO NOT UPDATE THIS HARDCODED LIST!! It is only as fallback for
+  # transitional period and to be removed later.  TLDs are now maintained in
+  # sa-update 20_aux_tlds.cf.
+  foreach (qw/
+    demon.co.uk esc.edu.ar lkd.co.im plc.co.im
+    /) { $self->{three_level_domains}{lc $_} = 1; }
+
   push (@cmds, {
     setting => 'util_rb_3tld',
     is_admin => 1,
@@ -3542,12 +3771,34 @@ demon.co.uk, plc.co.im, etc.
       unless (defined $value && $value !~ /^$/) {
 	return $MISSING_REQUIRED_VALUE;
       }
-      unless ($value =~ /^[^\s.]+\.[^\s.]+\.[^\s.]+(?:\s+[^\s.]+\.[^\s.]+)*$/) {
+      unless ($value =~ /^[^\s.]+\.[^\s.]+\.[^\s.]+(?:\s+[^\s.]+\.[^\s.]+\.[^\s.]+)*$/) {
 	return $INVALID_VALUE;
       }
       foreach (split(/\s+/, $value)) {
-        $Mail::SpamAssassin::Util::RegistrarBoundaries::THREE_LEVEL_DOMAINS{lc $_} = 1;
+        $self->{three_level_domains}{lc $_} = 1;
+      }
+    }
+  });
+
+=item clear_util_rb
+
+Empty internal list of valid TLDs (including 2nd and 3rd level) which
+RegistryBoundaries code uses.  Only useful if you want to override the
+standard lists supplied by sa-update.
+
+=cut
+
+  push (@cmds, {
+    setting => 'clear_util_rb',
+    type => $CONF_TYPE_NOARGS,
+    code => sub {
+      my ($self, $key, $value, $line) = @_;
+      unless (!defined $value || $value eq '') {
+        return $INVALID_VALUE;
       }
+      $self->{valid_tlds} = ();
+      $self->{two_level_domains} = ();
+      $self->{three_level_domains} = ();
     }
   });
 
@@ -4872,6 +5123,7 @@ sub feature_yesno_takes_args { 1 }
 sub feature_bug6558_free { 1 }
 sub feature_edns { 1 }  # supports 'dns_options edns' config option
 sub feature_dns_query_restriction { 1 }  # supported config option
+sub feature_registryboundaries { 1 } # replaces deprecated registrarboundaries
 sub perl_min_version_5010000 { return $] >= 5.010000 }  # perl version check ("perl_version" not neatly backwards-compatible)
 
 ###########################################################################

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm?rev=1671621&r1=1671620&r2=1671621&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm Mon Apr  6 17:52:12 2015
@@ -60,7 +60,6 @@ use Mail::SpamAssassin::Constants qw(:sa
 use Mail::SpamAssassin::AsyncLoop;
 use Mail::SpamAssassin::Conf;
 use Mail::SpamAssassin::Util qw(untaint_var uri_list_canonicalize);
-use Mail::SpamAssassin::Util::RegistrarBoundaries;
 use Mail::SpamAssassin::Timeout;
 use Mail::SpamAssassin::Logger;
 
@@ -2108,30 +2107,39 @@ sub get {
 #
 # bug 4522: ISO2022 format mail, most commonly Japanese SHIFT-JIS, inserts a three character escape sequence  ESC ( .
 
-# a hybrid of tbird and oe's  version of uri parsing
-my $tbirdstartdelim = '><"\'`,{[(|\s'  . "\x1b";  # The \x1b as per bug 4522
-my $iso2022shift = "\x1b" . '\(.';  # bug 4522
-my $tbirdenddelim = '><"`}\]{[|\s' . "\x1b";  # The \x1b as per bug 4522
-my $oeignoreatend = '-~!@#^&*()_+=:;\'?,.';
-my $nonASCII    = '\x80-\xff';
-
-# bug 7100: we allow a comma to delimit the end of an email address because it will never appear in a domain name, and
-# it's a common thing to find in text
-my $tbirdenddelimemail = $tbirdenddelim . ',(\'' . $nonASCII;  # tbird ignores non-ASCII mail addresses for now, until RFC changes
-my $tbirdenddelimplusat = $tbirdenddelimemail . '@';
-
-# valid TLDs
-my $tldsRE = $Mail::SpamAssassin::Util::RegistrarBoundaries::VALID_TLDS_RE;
-
-# knownscheme regexp looks for either a https?: or ftp: scheme, or www\d*\. or ftp\. prefix, i.e., likely to start a URL
-# schemeless regexp looks for a valid TLD at the end of what may be a FQDN, followed by optional ., optional :portnum, optional /rest_of_uri
-my $urischemeless = qr/[a-z\d][a-z\d._-]{0,251}\.${tldsRE}\.?(?::\d{1,5})?(?:\/[^$tbirdenddelim]{1,251})?/io;
-my $uriknownscheme = qr/(?:(?:(?:(?:https?)|(?:ftp)):(?:\/\/)?)|(?:(?:www\d{0,2}|ftp)\.))[^$tbirdenddelim]{1,251}/io;
-my $urimailscheme = qr/(?:mailto:)?[^$tbirdenddelimplusat]{1,251}@[^$tbirdenddelimemail]{1,251}/io;
-my $tbirdurire = qr/(?:\b|(?<=$iso2022shift)|(?<=[$tbirdstartdelim]))
-                    (?:(?:($uriknownscheme)(?=(?:[$tbirdenddelim]|\z))) |
-                       (?:($urimailscheme)(?=(?:[$tbirdenddelimemail]|\z))) |
-                       (?:\b($urischemeless)(?=(?:[$tbirdenddelim]|\z))))/xo;
+sub _tbirdurire {
+  my ($self) = @_;
+
+  # Cached?
+  return $self->{tbirdurire} if $self->{tbirdurire};
+
+  # a hybrid of tbird and oe's  version of uri parsing
+  my $tbirdstartdelim = '><"\'`,{[(|\s'  . "\x1b";  # The \x1b as per bug 4522
+  my $iso2022shift = "\x1b" . '\(.';  # bug 4522
+  my $tbirdenddelim = '><"`}\]{[|\s' . "\x1b";  # The \x1b as per bug 4522
+  my $nonASCII    = '\x80-\xff';
+
+  # bug 7100: we allow a comma to delimit the end of an email address because it will never appear in a domain name, and
+  # it's a common thing to find in text
+  my $tbirdenddelimemail = $tbirdenddelim . ',(\'' . $nonASCII;  # tbird ignores non-ASCII mail addresses for now, until RFC changes
+  my $tbirdenddelimplusat = $tbirdenddelimemail . '@';
+
+  # valid TLDs
+  my $tldsRE = $self->{main}->{registryboundaries}->{valid_tlds_re};
+
+  # knownscheme regexp looks for either a https?: or ftp: scheme, or www\d*\. or ftp\. prefix, i.e., likely to start a URL
+  # schemeless regexp looks for a valid TLD at the end of what may be a FQDN, followed by optional ., optional :portnum, optional /rest_of_uri
+  my $urischemeless = qr/[a-z\d][a-z\d._-]{0,251}\.${tldsRE}\.?(?::\d{1,5})?(?:\/[^$tbirdenddelim]{1,251})?/io;
+  my $uriknownscheme = qr/(?:(?:(?:(?:https?)|(?:ftp)):(?:\/\/)?)|(?:(?:www\d{0,2}|ftp)\.))[^$tbirdenddelim]{1,251}/io;
+  my $urimailscheme = qr/(?:mailto:)?[^$tbirdenddelimplusat]{1,251}@[^$tbirdenddelimemail]{1,251}/io;
+
+  $self->{tbirdurire} = qr/(?:\b|(?<=$iso2022shift)|(?<=[$tbirdstartdelim]))
+                        (?:(?:($uriknownscheme)(?=(?:[$tbirdenddelim]|\z))) |
+                        (?:($urimailscheme)(?=(?:[$tbirdenddelimemail]|\z))) |
+                        (?:\b($urischemeless)(?=(?:[$tbirdenddelim]|\z))))/xo;
+
+  return $self->{tbirdurire};
+}
 
 =item $status->get_uri_list ()
 
@@ -2265,7 +2273,7 @@ sub get_uri_detail_list {
     $info->{cleaned} = \@tmp;
 
     foreach (@tmp) {
-      my($domain,$host) = Mail::SpamAssassin::Util::uri_to_domain($_);
+      my($domain,$host) = $self->{main}->{registryboundaries}->uri_to_domain($_);
       if (defined $host && $host ne '' && !$info->{hosts}->{$host}) {
         # unstripped full host name as a key, and its domain part as a value
         $info->{hosts}->{$host} = $domain;
@@ -2306,7 +2314,7 @@ sub get_uri_detail_list {
       $info->{cleaned} = \@uris;
 
       foreach (@uris) {
-        my($domain,$host) = Mail::SpamAssassin::Util::uri_to_domain($_);
+        my($domain,$host) = $self->{main}->{registryboundaries}->uri_to_domain($_);
         if (defined $host && $host ne '' && !$info->{hosts}->{$host}) {
           # unstripped full host name as a key, and its domain part as a value
           $info->{hosts}->{$host} = $domain;
@@ -2354,6 +2362,7 @@ sub _get_parsed_uri_list {
 
     my ($rulename, $pat, @uris);
     my $text;
+    my $tbirdurire = $self->_tbirdurire;
 
     for my $entry (@$textary) {
 
@@ -2369,7 +2378,7 @@ sub _get_parsed_uri_list {
       while (/$tbirdurire/igo) {
         my $rawuri = $1||$2||$3;
         $rawuri =~ s/(^[^(]*)\).*$/$1/;  # as per ThunderBird, ) is an end delimiter if there is no ( preceeding it
-        $rawuri =~ s/[$oeignoreatend]*$//; # remove trailing string of punctuations that TBird ignores
+        $rawuri =~ s/[-~!@#^&*()_+=:;\'?,.]*$//; # remove trailing string of punctuations that TBird ignores
         # skip if there is '..' in the hostname portion of the URI, something we can't catch in the general URI regexp
         next if $rawuri =~ /^(?:(?:https?|ftp|mailto):(?:\/\/)?)?[a-z\d.-]*\.\./i;
 
@@ -2400,7 +2409,7 @@ sub _get_parsed_uri_list {
           # skip a mail link that does not have a valid TLD or other than one @ after decoding any URLEncoded characters
           $uri = Mail::SpamAssassin::Util::url_encode($uri) if ($uri =~ /\%(?:2[1-9a-fA-F]|[3-6][0-9a-fA-F]|7[0-9a-eA-E])/);
           next if ($uri !~ /^[^@]+@[^@]+$/);
-          my $domuri = Mail::SpamAssassin::Util::uri_to_domain($uri);
+          my $domuri = $self->{main}->{registryboundaries}->uri_to_domain($uri);
           next unless $domuri;
           push (@uris, $rawuri);
           push (@uris, $uri) unless ($rawuri eq $uri);
@@ -2411,7 +2420,7 @@ sub _get_parsed_uri_list {
         my @tmp = uri_list_canonicalize($redirector_patterns, $uri);
         my $goodurifound = 0;
         foreach my $cleanuri (@tmp) {
-          my $domain = Mail::SpamAssassin::Util::uri_to_domain($cleanuri);
+          my $domain = $self->{main}->{registryboundaries}->uri_to_domain($cleanuri);
           if ($domain) {
             # bug 5780: Stop after domain to avoid FP, but do that after all deobfuscation of urlencoding and redirection
             if ($rblonly) {
@@ -3070,7 +3079,7 @@ sub all_from_addrs_domains {
 
   #loop through and limit to just the domain with a dummy address
   for (my $i = 0; $i < scalar(@addrs); $i++) {
-    $addrs[$i] = 'dummy@'.&Mail::SpamAssassin::Util::uri_to_domain($addrs[$i]);
+    $addrs[$i] = 'dummy@'.&$self->{main}->{registryboundaries}->uri_to_domain($addrs[$i]);
   }
 
   #Remove duplicate domains

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/FreeMail.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/FreeMail.pm?rev=1671621&r1=1671620&r2=1671621&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/FreeMail.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/FreeMail.pm Mon Apr  6 17:52:12 2015
@@ -110,30 +110,12 @@ my $VERSION = 2.002;
 
 use Mail::SpamAssassin::Plugin;
 use Mail::SpamAssassin::PerMsgStatus;
-use Mail::SpamAssassin::Util::RegistrarBoundaries;
-use vars qw(@ISA);
-@ISA = qw(Mail::SpamAssassin::Plugin);
 
-# List of TLDs from RegistrarBoundaries.pm
-my $tlds = $Mail::SpamAssassin::Util::RegistrarBoundaries::VALID_TLDS_RE;
+use vars qw(@ISA $email_whitelist $skip_replyto_envfrom);
+@ISA = qw(Mail::SpamAssassin::Plugin);
 
-### Some regexp tips courtesy of http://www.regular-expressions.info/email.html
-### v 0.02
-# full email regex
-my $email_regex = qr/
-  (?=.{0,64}\@)				# limit userpart to 64 chars (and speed up searching?)
-  (?<![a-z0-9!#\$%&'*+\/=?^_`{|}~-])	# start boundary
-  (					# capture email
-  [a-z0-9!#\$%&'*+\/=?^_`{|}~-]+	# no dot in beginning
-  (?:\.[a-z0-9!#\$%&'*+\/=?^_`{|}~-]+)*	# no consecutive dots, no ending dot
-  \@
-  (?:[a-z0-9](?:[a-z0-9-]{0,59}[a-z0-9])?\.){1,4} # max 4x61 char parts (should be enough?)
-  ${tlds}				# ends with valid tld
-  )
-  (?!(?:[a-z0-9-]|\.[a-z0-9]))		# make sure domain ends here
-/xi;
 # default email whitelist
-my $email_whitelist = qr/
+$email_whitelist = qr/
   ^(?:
       abuse|support|sales|info|helpdesk|contact|kontakt
     | (?:post|host|domain)master
@@ -147,7 +129,6 @@ my $email_whitelist = qr/
 
 # skip replyto check when envelope sender is
 # allow <> for now
-my $skip_replyto_envfrom;
 { # no re "strict";  # since perl 5.21.8: Ranges of ASCII printables...
   $skip_replyto_envfrom = qr/
   (?:
@@ -161,7 +142,6 @@ my $skip_replyto_envfrom;
 /xi;
 }
 
-
 sub dbg { Mail::SpamAssassin::Plugin::dbg ("FreeMail: @_"); }
 
 sub new {
@@ -178,6 +158,22 @@ sub new {
     $self->register_eval_rule("check_freemail_header");
     $self->register_eval_rule("check_freemail_body");
 
+    # Need to init the regex here, utilizing registryboundaries->valid_tlds_re
+    # Some regexp tips courtesy of http://www.regular-expressions.info/email.html
+    # full email regex v0.02
+    $self->{email_regex} = qr/
+      (?=.{0,64}\@)				# limit userpart to 64 chars (and speed up searching?)
+      (?<![a-z0-9!#\$%&'*+\/=?^_`{|}~-])	# start boundary
+      (						# capture email
+      [a-z0-9!#\$%&'*+\/=?^_`{|}~-]+		# no dot in beginning
+      (?:\.[a-z0-9!#\$%&'*+\/=?^_`{|}~-]+)*	# no consecutive dots, no ending dot
+      \@
+      (?:[a-z0-9](?:[a-z0-9-]{0,59}[a-z0-9])?\.){1,4} # max 4x61 char parts (should be enough?)
+      $self->{main}->{registryboundaries}->{valid_tlds_re}	# ends with valid tld
+      )
+      (?!(?:[a-z0-9-]|\.[a-z0-9]))		# make sure domain ends here
+    /xi;
+
     return $self;
 }
 
@@ -333,7 +329,7 @@ sub _parse_body {
         my $parsed = $pms->get_uri_detail_list();
         while (my($uri, $info) = each %{$parsed}) {
             if (defined $info->{types}->{a} and not defined $info->{types}->{parsed}) {
-                if ($uri =~ /^(?:(?i)mailto):${email_regex}/) {
+                if ($uri =~ /^(?:(?i)mailto):$self->{email_regex}/) {
                     my $email = lc($1);
                     push(@body_emails, $email) unless defined $seen{$email};
                     $seen{$email} = 1;
@@ -349,8 +345,8 @@ sub _parse_body {
             s#<?https?://\S{0,255}(?:\@|%40)\S{0,255}# #gi;
             # strip emails contained in <>, not mailto:
             # also strip ones followed by quote-like "wrote:" (but not fax: and tel: etc)
-            s#<?(?<!mailto:)${email_regex}(?:>|\s{1,10}(?!(?:fa(?:x|csi)|tel|phone|e?-?mail))[a-z]{2,11}:)# #gi;
-            while (/$email_regex/g) {
+            s#<?(?<!mailto:)$self->{email_regex}(?:>|\s{1,10}(?!(?:fa(?:x|csi)|tel|phone|e?-?mail))[a-z]{2,11}:)# #gi;
+            while (/$self->{email_regex}/g) {
                 my $email = lc($1);
                 push(@body_emails, $email) unless defined $seen{$email};
                 $seen{$email} = 1;

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/HTTPSMismatch.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/HTTPSMismatch.pm?rev=1671621&r1=1671620&r2=1671621&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/HTTPSMismatch.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/HTTPSMismatch.pm Mon Apr  6 17:52:12 2015
@@ -73,8 +73,8 @@ sub check_https_http_mismatch {
 
 	  # want to compare whole hostnames instead of domains?
 	  # comment this next section to the blank line.
-	  $uri = Mail::SpamAssassin::Util::RegistrarBoundaries::trim_domain($uri);
-          undef $uri unless (Mail::SpamAssassin::Util::RegistrarBoundaries::is_domain_valid($uri));
+	  $uri = $self->{main}->{registryboundaries}->trim_domain($uri);
+          undef $uri unless ($self->{main}->{registryboundaries}->is_domain_valid($uri));
 
 	  last if $uri;
         }
@@ -90,8 +90,8 @@ sub check_https_http_mismatch {
 	  # want to compare whole hostnames instead of domains?
 	  # comment this next section to the blank line.
           if ($https !~ /^\d+\.\d+\.\d+\.\d+$/) {
-	    $https = Mail::SpamAssassin::Util::RegistrarBoundaries::trim_domain($https);
-            undef $https unless (Mail::SpamAssassin::Util::RegistrarBoundaries::is_domain_valid($https));
+	    $https = $self->{main}->{registryboundaries}->trim_domain($https);
+            undef $https unless ($self->{main}->{registryboundaries}->is_domain_valid($https));
           }
 	  next unless $https;
 

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/HeaderEval.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/HeaderEval.pm?rev=1671621&r1=1671620&r2=1671621&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/HeaderEval.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/HeaderEval.pm Mon Apr  6 17:52:12 2015
@@ -1037,9 +1037,9 @@ sub check_ratware_envelope_from {
 
   if ($to =~ /^([^@]+)@(.+)$/) {
     my($user,$dom) = ($1,$2);
-    $dom = Mail::SpamAssassin::Util::RegistrarBoundaries::trim_domain($dom);
+    $dom = $self->{main}->{registryboundaries}->trim_domain($dom);
     return unless
-        (Mail::SpamAssassin::Util::RegistrarBoundaries::is_domain_valid($dom));
+        ($self->{main}->{registryboundaries}->is_domain_valid($dom));
 
     return 1 if ($from =~ /\b\Q$dom\E.\Q$user\E@/i);
   }

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/URIDNSBL.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/URIDNSBL.pm?rev=1671621&r1=1671620&r2=1671621&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/URIDNSBL.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/URIDNSBL.pm Mon Apr  6 17:52:12 2015
@@ -295,7 +295,6 @@ package Mail::SpamAssassin::Plugin::URID
 use Mail::SpamAssassin::Plugin;
 use Mail::SpamAssassin::Constants qw(:ip);
 use Mail::SpamAssassin::Util;
-use Mail::SpamAssassin::Util::RegistrarBoundaries;
 use Mail::SpamAssassin::Logger;
 use strict;
 use warnings;
@@ -961,7 +960,7 @@ sub complete_ns_lookup {
           $seen_lookups->{'A:'.$nsmatch} = 1;
           $self->lookup_a_record($pms, $ent->{obj}, $nsmatch);
         }
-        $nsrhblstr = Mail::SpamAssassin::Util::RegistrarBoundaries::trim_domain($nsmatch);
+        $nsrhblstr = $self->{main}->{registryboundaries}->trim_domain($nsmatch);
       }
 
       foreach my $rulename (keys %{$nsrhsblrules}) {

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/URIEval.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/URIEval.pm?rev=1671621&r1=1671620&r2=1671621&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/URIEval.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/URIEval.pm Mon Apr  6 17:52:12 2015
@@ -55,7 +55,7 @@ sub check_for_http_redirector {
     while (s{^https?://([^/:\?]+).+?(https?:/{0,2}?([^/:\?]+).*)$}{$2}i) {
       my ($redir, $dest) = ($1, $3);
       foreach ($redir, $dest) {
-	$_ = Mail::SpamAssassin::Util::uri_to_domain(lc($_)) || $_;
+	$_ = $self->{main}->{registryboundaries}->uri_to_domain($_) || $_;
       }
       next if ($redir eq $dest);
       dbg("eval: redirect: found $redir to $dest, flagging");

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/WLBLEval.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/WLBLEval.pm?rev=1671621&r1=1671620&r2=1671621&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/WLBLEval.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/WLBLEval.pm Mon Apr  6 17:52:12 2015
@@ -262,14 +262,14 @@ sub _check_addr_matches_rcvd {
   return 0  if !@relays;
 
   my($adrh,$adrd) =
-    Mail::SpamAssassin::Util::RegistrarBoundaries::split_domain($addr_domain);
+    $self->{main}->{registryboundaries}->split_domain($addr_domain);
   my $match = 0;
   my $any_tried = 0;
   foreach my $rly (@relays) {
     my $relay_rdns = $rly->{lc_rdns};
     next  if !defined $relay_rdns || $relay_rdns eq '';
     my($rlyh,$rlyd) =
-      Mail::SpamAssassin::Util::RegistrarBoundaries::split_domain($relay_rdns);
+      $self->{main}->{registryboundaries}->split_domain($relay_rdns);
     $any_tried = 1;
     if ($adrd eq $rlyd) {
       dbg("rules: $addr MATCHES relay $relay_rdns ($adrd)");

Added: spamassassin/trunk/lib/Mail/SpamAssassin/RegistryBoundaries.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/RegistryBoundaries.pm?rev=1671621&view=auto
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/RegistryBoundaries.pm (added)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/RegistryBoundaries.pm Mon Apr  6 17:52:12 2015
@@ -0,0 +1,241 @@
+# The (extremely complex) rules for domain delegation.
+
+# <@LICENSE>
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to you under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at:
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# </...@LICENSE>
+
+=head1 NAME
+
+Mail::SpamAssassin::RegistryBoundaries - domain delegation rules
+
+=cut
+
+package Mail::SpamAssassin::RegistryBoundaries;
+
+use strict;
+use warnings;
+use bytes;
+use re 'taint';
+
+our @ISA = qw();
+use vars qw(%US_STATES);
+
+# called from SpamAssassin->init() to create $self->{util_rb}
+sub new {
+  my $class = shift;
+  $class = ref($class) || $class;
+
+  my ($main) = @_;
+  my $self = {
+    'main'              => $main,
+    'conf'              => $main->{conf},
+  };
+  bless ($self, $class);
+
+  # Initialize valid_tlds_re for schemeless uri parsing, FreeMail etc
+  if ($self->{conf}->{valid_tlds}) {
+    my $tlds = join('|', keys %{$self->{conf}->{valid_tlds}});
+    # Perl 5.10+ trie optimizes lists, no need for fancy regex optimizing
+    $self->{valid_tlds_re} = qr/(?:$tlds)/i;
+  }
+  else {
+    # Failsafe in case no tlds defined, we don't want this to match everything..
+    $self->{valid_tlds_re} = qr/no_tlds_defined/;
+  }
+
+  $self;
+}
+
+# This is required because the .us domain is nuts. See split_domain.
+foreach (qw/
+  ak al ar az ca co ct dc de fl ga gu hi ia id il in ks ky la ma md me mi
+  mn mo ms mt nc nd ne nh nj nm nv ny oh ok or pa pr ri sc sd tn tx ut va vi
+  vt wa wi wv wy
+  /) {
+  $US_STATES{$_} = 1;
+}
+
+###########################################################################
+
+=head1 METHODS
+
+=over 4
+
+=item ($hostname, $domain) = split_domain ($fqdn)
+
+Cut a fully-qualified hostname into the hostname part and the domain
+part, splitting at the DNS registry boundary.
+
+Examples:
+
+    "www.foo.com" => ( "www", "foo.com" )
+    "www.foo.co.uk" => ( "www", "foo.co.uk" )
+
+=cut
+
+sub split_domain {
+  my $self = shift;
+  my $domain = lc shift;
+
+  my $hostname = '';
+
+  if (defined $domain && $domain ne '') {
+    # www..spamassassin.org -> www.spamassassin.org
+    $domain =~ tr/././s;
+
+    # leading/trailing dots
+    $domain =~ s/^\.+//;
+    $domain =~ s/\.+$//;
+
+    # Split scalar domain into components
+    my @domparts = split(/\./, $domain);
+    my @hostname;
+
+    while (@domparts > 1) { # go until we find the TLD
+      if (@domparts == 4) {
+        if ($domparts[3] eq 'us' &&
+            (($domparts[0] eq 'pvt' && $domparts[1] eq 'k12') ||
+             ($domparts[0] =~ /^c[io]$/)))
+        {
+          # http://www.neustar.us/policies/docs/rfc_1480.txt
+          # "Fire-Dept.CI.Los-Angeles.CA.US"
+          # "<school-name>.PVT.K12.<state>.US"
+          last if ($US_STATES{$domparts[2]});
+        }
+      }
+      elsif (@domparts == 3) {
+        # http://www.neustar.us/policies/docs/rfc_1480.txt
+        # demon.co.uk
+        # esc.edu.ar
+        # [^\.]+\.${US_STATES}\.us
+        if ($domparts[2] eq 'us') {
+          last if ($US_STATES{$domparts[1]});
+        }
+        else {
+          my $temp = join(".", @domparts);
+          last if ($self->{conf}->{three_level_domains}{$temp});
+        }
+      }
+      elsif (@domparts == 2) {
+        # co.uk, etc.
+        my $temp = join(".", @domparts);
+        last if ($self->{conf}->{two_level_domains}{$temp});
+      }
+      push(@hostname, shift @domparts);
+    }
+
+    # Look for a sub-delegated TLD
+    # use @domparts to skip trying to match on TLDs that can't possibly
+    # match, but keep in mind that the hostname can be blank, so 4TLD needs 4,
+    # 3TLD needs 3, 2TLD needs 2 ...
+    #
+    unshift @domparts, pop @hostname if @hostname;
+    $domain = join(".", @domparts);
+    $hostname = join(".", @hostname);
+  }
+
+  ($hostname, $domain);
+}
+
+###########################################################################
+
+=item $domain = trim_domain($fqdn)
+
+Cut a fully-qualified hostname into the hostname part and the domain
+part, returning just the domain.
+
+Examples:
+
+    "www.foo.com" => "foo.com"
+    "www.foo.co.uk" => "foo.co.uk"
+
+=cut
+
+sub trim_domain {
+  my $self = shift;
+  my $domain = shift;
+
+  my ($host, $dom) = $self->split_domain($domain);
+  return $dom;
+}
+
+###########################################################################
+
+=item $ok = is_domain_valid($dom)
+
+Return C<1> if the domain is valid, C<undef> otherwise.  A valid domain
+(a) does not contain whitespace, (b) contains at least one dot, and (c)
+uses a valid TLD or ccTLD.
+
+=back
+
+=cut
+
+sub is_domain_valid {
+  my $self = shift;
+  my $dom = lc shift;
+
+  # domains don't have whitespace
+  return 0 if ($dom =~ /\s/);
+
+  # ensure it ends in a known-valid TLD, and has at least 1 dot
+  return 0 unless ($dom =~ /\.([^.]+)$/);
+  return 0 unless ($self->{conf}->{valid_tlds}{$1});
+
+  return 1;     # nah, it's ok.
+}
+
+#
+
+sub uri_to_domain {
+  my $self = shift;
+  my $uri = lc shift;
+
+  # Javascript is not going to help us, so return.
+  return if ($uri =~ /^javascript:/);
+
+  $uri =~ s{\#.*$}{}gs;			# drop fragment
+  $uri =~ s{^[a-z]+:/{0,2}}{}gs;	# drop the protocol
+  $uri =~ s{^[^/]*\@}{}gs;		# username/passwd
+
+  # strip path and CGI params.  note: bug 4213 shows that "&" should
+  # *not* be likewise stripped here -- it's permitted in hostnames by
+  # some common MUAs!
+  $uri =~ s{[/?].*$}{}gs;              
+
+  $uri =~ s{:\d*$}{}gs;		# port, bug 4191: sometimes the # is missing
+
+  # skip undecoded URIs if the encoded bits shouldn't be.
+  # we'll see the decoded version as well.  see url_encode()
+  return if $uri =~ /\%(?:2[1-9a-f]|[3-6][0-9a-f]|7[0-9a-e])/;
+
+  my $host = $uri;  # unstripped/full domain name
+
+  # keep IPs intact
+  if ($uri !~ /^\d+\.\d+\.\d+\.\d+$/) { 
+    # get rid of hostname part of domain, understanding delegation
+    $uri = $self->trim_domain($uri);
+
+    # ignore invalid domains
+    return unless ($self->is_domain_valid($uri));
+  }
+  
+  # $uri is now the domain only, optionally return unstripped host name
+  return !wantarray ? $uri : ($uri, $host);
+}
+
+1;
+

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Util.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Util.pm?rev=1671621&r1=1671620&r2=1671621&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Util.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Util.pm Mon Apr  6 17:52:12 2015
@@ -66,7 +66,6 @@ BEGIN {
 }
 
 use Mail::SpamAssassin;
-use Mail::SpamAssassin::Util::RegistrarBoundaries;
 
 use Config;
 use IO::Handle;
@@ -1204,6 +1203,10 @@ sub secure_tmpdir {
 
 ###########################################################################
 
+##
+## DEPRECATED FUNCTION, only left for third party plugins as fallback.
+## Replaced with Mail::SpamAssassin::RegistryBoundaries::uri_to_domain.
+##
 sub uri_to_domain {
   my ($uri) = @_;
 

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Util/RegistrarBoundaries.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Util/RegistrarBoundaries.pm?rev=1671621&r1=1671620&r2=1671621&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Util/RegistrarBoundaries.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Util/RegistrarBoundaries.pm Mon Apr  6 17:52:12 2015
@@ -22,6 +22,10 @@
 
 Mail::SpamAssassin::Util::RegistrarBoundaries - domain delegation rules
 
+DEPRECATED AND REPLACED WITH Mail::SpamAssassin::RegistryBoundaries !!
+
+DO NOT USE. This is left as fallback for third party plugins.
+
 =cut
 
 package Mail::SpamAssassin::Util::RegistrarBoundaries;
@@ -37,25 +41,18 @@ use vars qw (
 
 # %VALID_TLDS
 # The list of currently-valid TLDs for the DNS system.
-# Note for SA devs only: If required, when updating domain lists, also modify t/uri_text.t accordingly
-#
-# bash line to generate a formatted list of domains
-# Fetches domains, drops the top comment line, then joins domains with spaces in between
-#   wget http://data.iana.org/TLD/tlds-alpha-by-domain.txt -O - | tail -n+2 | perl -e 'chomp && s/$/ / && print lc while <>' && echo
-#
-# Remember to also change regexp below when updating!
-#
-# Last update: 2015-04-02-axb-Easter-Update
+# This list is deprecated and unmaintained. It will become increasingly
+# out of date and will be removed in a future release.
+# As of 3.4.2, updates will be done in rules/20_aux_tlds.cf
 foreach (qw/abbott abogado ac academy accountant accountants active actor ad ads adult ae aero af afl ag agency ai airforce al allfinanz alsace am amsterdam an android ao apartments aq aquarelle ar archi army arpa as asia associates at attorney au auction audio autos aw ax axa az ba band bank bar barclaycard barclays bargains bayern bb bbc bd be beer berlin best bf bg bh bi bid bike bingo bio biz bj black blackfriday bloomberg blue bm bmw bn bnpparibas bo boats bond boo boutique br brussels bs bt budapest build builders business buzz bv bw by bz bzh ca cab cal camera camp cancerresearch canon capetown capital caravan cards care career careers cartier casa cash casino cat catering cbn cc cd center ceo cern cf cfd cg ch channel chat cheap chloe christmas chrome church ci citic city ck cl claims cleaning click clinic clothing club cm cn co coach codes coffee college cologne com community company computer condos construction consulting contractors cooking cool coop country courses cr cr
 edit creditcard cricket crs cruises cu cuisinella cv cw cx cy cymru cz dabur dad dance date dating datsun day dclk de deals degree delivery democrat dental dentist desi design dev diamonds diet digital direct directory discount dj dk dm dnp do docs doha domains doosan download durban dvag dz eat ec edu education ee eg email emerck energy engineer engineering enterprises epson equipment er erni es esq estate et eu eurovision eus events everbank exchange expert exposed fail faith fan fans farm fashion feedback fi film finance financial firmdale fish fishing fit fitness fj fk flights florist flowers flsmidth fly fm fo foo football forex forsale foundation fr frl frogans fund furniture futbol ga gal gallery garden gb gbiz gd gdn ge gent gf gg ggee gh gi gift gifts gives gl glass gle global globo gm gmail gmo gmx gn gold goldpoint golf goo goog google gop gov gp gq gr graphics gratis green gripe gs gt gu guge guide guitars guru gw gy hamburg hangout haus healthcare help here hermes hipho
 p hiv hk hm hn holdings holiday homes horse host hosting house how hr ht hu ibm id ie ifm il im immo immobilien in industries infiniti info ing ink institute insure int international investments io iq ir irish is it iwc java jcb je jetzt jm jo jobs joburg jp juegos kaufen kddi ke kg kh ki kim kitchen kiwi km kn koeln komatsu kp kr krd kred kw ky kyoto kz la lacaixa land lat latrobe lawyer lb lc lds lease leclerc legal lgbt li lidl life lighting limited limo link lk loan loans london lotte lotto lr ls lt ltda lu luxe luxury lv ly ma madrid maif maison management mango market marketing markets marriott mc md me media meet melbourne meme memorial menu mg mh miami mil mini mk ml mm mma mn mo mobi moda moe monash money mormon mortgage moscow motorcycles mov movie mp mq mr ms mt mtn mtpc mu museum mv mw mx my mz na nagoya name navy nc ne net network neustar new news nexus nf ng ngo nhk ni nico ninja nissan nl no np nr nra nrw ntt nu nyc nz okinawa om one ong onl online ooo oracle org orga
 nic osaka otsuka ovh pa page panerai paris partners parts party pe pf pg ph pharmacy photo photography photos physio piaget pics pictet pictures pink pizza pk pl place plumbing plus pm pn pohl poker porn post pr praxi press pro prod productions prof properties property ps pt pub pw py qa qpon quebec re realtor recipes red redstone rehab reise reisen reit ren rentals repair report republican rest restaurant review reviews rich rio rip ro rocks rodeo rs rsvp ru ruhr rw ryukyu sa saarland sale samsung sap sarl saxo sb sc sca scb schmidt school schule schwarz science scot sd se services sew sexy sg sh shiksha shoes shriram si singles site sj sk sky sl sm sn so social software sohu solar solutions soy space spiegel spreadbetting sr st study style su sucks supplies supply support surf surgery suzuki sv sx sy sydney systems sz taipei tatar tattoo tax tc td tech technology tel temasek tennis tf tg th tickets tienda tips tires tirol tj tk tl tm tn to today tokyo tools top toshiba tours town 
 toys tr trade trading training travel trust tt tui tv tw tz ua ug uk university uno uol us uy uz va vacations vc ve vegas ventures versicherung vet vg vi viajes video villas vision vlaanderen vn vodka vote voting voto voyage vu wales wang watch webcam website wed wedding wf whoswho wien wiki williamhill win wme work works world ws wtc wtf xin xn--1qqw23a xn--30rr7y xn--3bst00m xn--3ds443g xn--3e0b707e xn--45brj9c xn--45q11c xn--4gbrim xn--55qw42g xn--55qx5d xn--6frz82g xn--6qq986b3xl xn--80adxhks xn--80ao21a xn--80asehdb xn--80aswg xn--90a3ac xn--90ais xn--9et52u xn--b4w605ferd xn--c1avg xn--cg4bki xn--clchc0ea0b2g2a9gcd xn--czr694b xn--czrs0t xn--czru2d xn--d1acj3b xn--d1alf xn--fiq228c5hs xn--fiq64b xn--fiqs8s xn--fiqz9s xn--flw351e xn--fpcrj9c3d xn--fzc2c9e2c xn--gecrj9c xn--h2brj9c xn--hxt814e xn--i1b6b1a6a2e xn--io0a7i xn--j1amh xn--j6w193g xn--kprw13d xn--kpry57d xn--kput3i xn--l1acc xn--lgbbat1ad8j xn--mgb9awbf xn--mgba3a4f16a xn--mgbaam7a8h xn--mgbab2bd xn--mgbayh7gpa xn--mg
 bbh1a71e xn--mgbc0a9azcg xn--mgberp4a5d4ar xn--mgbx4cd0ab xn--mxtq1m xn--ngbc5azd xn--node xn--nqv7f xn--nqv7fs00ema xn--o3cw4h xn--ogbpf8fl xn--p1acf xn--p1ai xn--pgbs0dh xn--q9jyb4c xn--qcka1pmc xn--rhqv96g xn--s9brj9c xn--ses554g xn--unup4y xn--vermgensberater-ctb xn--vermgensberatung-pwb xn--vhquv xn--vuq861b xn--wgbh1c xn--wgbl6a xn--xhq521b xn--xkc2al3hye2a xn--xkc2dl3a5ee0h xn--yfro4i67o xn--ygbi2ammx xn--zfr164b xxx xyz yachts yandex ye yodobashi yoga yokohama youtube yt za zip zm zone zuerich zw/) {
   $VALID_TLDS{$_} = 1;
 }
 
 # $VALID_TLDS_RE
 # %VALID_TLDS as Regexp::List optimized regexp, for use in Plugins etc
-# bash line to generate regex from TLD list
-# Fetches domains, drops the top commet line, builds a regex from the list of domains, then formats it to remove (?-xsim:) regex modifier flags
-#   wget http://data.iana.org/TLD/tlds-alpha-by-domain.txt -O - | tail -n+2 | perl -MRegexp::List -e '$/=undef; $_=<>; $r = Regexp::List->new; push @l, $_ for (split); print $r->list2re(@l)' | perl -pe 's/^\(\?[^:]*:(.*)\)$/$1/' && echo
-#
+# This regex is deprecated and unmaintained. It will become increasingly
+# out of date and will be removed in a future release.
+# As of 3.4.2, this regex is generated automatically in Conf.pm
 $VALID_TLDS_RE = qr/(?:X(?:N--(?:M(?:GB(?:A(?:(?:3A4F16|YH7GP)A|AM7A8H|B2BD)|ERP4A5D4AR|C0A9AZCG|BH1A71E|X4CD0AB|9AWBF)|XTQ1M)|F(?:IQ(?:(?:228C5H|S8|Z9)S|64B)|PCRJ9C3D|ZC2C9E2C|LW351E)|C(?:ZR(?:694B|S0T|U2D)|LCHC0EA0B2G2A9GCD|G4BKI|1AVG)|V(?:(?:ERMGENSBERAT(?:UNG-PW|ER-CT)|UQ861)B|HQUV)|X(?:KC2(?:DL3A5EE0H|AL3HYE2A)|HQ521B)|3(?:E0B707E|BST00M|DS443G|0RR7Y)|N(?:QV7F(?:S00EMA)?|GBC5AZD|ODE)|80A(?:S(?:EHDB|WG)|DXHKS|O21A)|(?:Q(?:CKA1PM|9JYB4)|GECRJ9)C|4(?:5(?:BRJ9|Q11)C|GBRIM)|KP(?:R(?:W13|Y57)D|UT3I)|9(?:0A(?:3AC|IS)|ET52U)|P(?:1A(?:CF|I)|GBS0DH)|Y(?:FRO4I67O|GBI2AMMX)|6(?:QQ986B3XL|FRZ82G)|I(?:1B6B1A6A2E|O0A7I)|L(?:GBBAT1AD8J|1ACC)|H(?:2BRJ9C|XT814E)|O(?:GBPF8FL|3CW4H)|S(?:9BRJ9C|ES554G)|J(?:6W193G|1AMH)|55Q(?:W42G|X5D)|D1A(?:CJ3B|LF)|WGB(?:H1C|L6A)|B4W605FERD|1QQW23A|RHQV96G|ZFR164B|UNUP4Y)|IN|XX|YZ)|C(?:[CDGKMVWXZ]|O(?:N(?:S(?:TRUCTION|ULTING)|(?:TRACTOR|DO)S)|M(?:P(?:UTER|ANY)|MUNITY)?|(?:L(?:LEG|OGN)|FFE)E|O(?:[LP]|KING)|U(?:NTRY|RSES)|ACH|DES)?|A(?:[BL]|R(?:E(?:ERS?)?|AVAN|TIER|
 DS)|N(?:CERRESEARCH|ON)|P(?:ETOWN|ITAL)|S(?:[AH]|INO)|T(?:ERING)?|M(?:ERA|P))?|H(?:R(?:ISTMAS|OME)|A(?:NNEL|T)|URCH|EAP|LOE)?|L(?:(?:EAN|OTH)ING|I(?:NIC|CK)|AIMS|UB)?|R(?:EDIT(?:CARD)?|(?:UISE)?S|ICKET)?|I(?:T(?:IC|Y))?|E(?:NTER|RN|O)|U(?:ISINELLA)?|Y(?:MRU)?|B?N|FD?)|S(?:[BDGJLMNRVXZ]|U(?:PP(?:L(?:IES|Y)|ORT)|R(?:GERY|F)|ZUKI|CKS)?|C(?:[AB]|H(?:MIDT|WARZ|OOL|ULE)|IENCE|OT)?|O(?:L(?:UTIONS|AR)|FTWARE|CIAL|HU|Y)?|A(?:ARLAND|MSUNG|LE|RL|XO|P)?|P(?:READBETTING|IEGEL|ACE)|H(?:IKSHA|RIRAM|OES)?|E(?:RVICES|XY|W)?|Y(?:STEMS|DNEY)?|I(?:NGLES|TE)?|T(?:UDY|YLE)?|KY?)|A(?:[OWZ]|C(?:T(?:IVE|OR)|COUNTANTS?|ADEMY)?|U(?:CTION|DIO|TOS)?|L(?:LFINANZ|SACE)?|S(?:SOCIATES|IA)?|B(?:OGADO|BOTT)|R(?:CHI|MY|PA)?|(?:MSTERDA)?M|Q(?:UARELLE)?|I(?:RFORCE)?|T(?:TORNEY)?|D(?:ULT|S)?|N(?:DROID)?|G(?:ENCY)?|PARTMENTS|E(?:RO)?|FL?|XA?)|M(?:[CDGHKLNPQRSVWXYZ]|O(?:R(?:TGAGE|MON)|N(?:ASH|EY)|TORCYCLES|V(?:IE)?|SCOW|BI|DA|E)?|A(?:R(?:KET(?:ING|S)?|RIOTT)|N(?:AGEMENT|GO)|I(?:SON|F)|DRID)?|E(?:M(?:ORIAL|E)|LBOURNE|DIA|ET
 |NU)?|I(?:(?:AM|N)I|L)|T(?:PC|N)?|U(?:SEUM)?|MA?)|B(?:[DFGHJSTVWY]|A(?:R(?:CLAY(?:CARD|S)|GAINS)?|N[DK]|YERN)?|U(?:ILD(?:ERS)?|DAPEST|SINESS|ZZ)|L(?:ACK(?:FRIDAY)?|OOMBERG|UE)|I(?:[DZ]|(?:NG)?O|KE)?|O(?:UTIQUE|ATS|ND|O)?|E(?:RLIN|ER|ST)?|N(?:PPARIBAS)?|R(?:USSELS)?|BC?|MW?|ZH?)|P(?:[EFGKMNSTWY]|R(?:O(?:D(?:UCTIONS)?|PERT(?:IES|Y)|F)?|AXI|ESS)?|A(?:R(?:T(?:(?:NER)?S|Y)|IS)|NERAI|GE)?|I(?:C(?:T(?:URES|ET)|S)|AGET|ZZA|NK)|H(?:OTO(?:GRAPHY|S)?|ARMACY|YSIO)?|L(?:U(?:MBING|S)|ACE)?|O(?:KER|HL|RN|ST)|UB)|G(?:[FHNPQSTWY]|O(?:[PV]|L(?:D(?:POINT)?|F)|O(?:G(?:LE)?)?)|R(?:A(?:PHIC|TI)S|EEN|IPE)?|U(?:I(?:TARS|DE)|GE|RU)?|L(?:OB(?:AL|O)|ASS|E)?|A(?:L(?:LERY)?|RDEN)?|I(?:FTS?|VES)?|M(?:[OX]|AIL)?|B(?:IZ)?|E(?:NT)?|G(?:EE)?|DN?)|F(?:[JM]|I(?:NANC(?:IAL|E)|SH(?:ING)?|T(?:NESS)?|RMDALE|LM)?|O(?:R(?:SALE|EX)|O(?:TBALL)?|UNDATION)?|L(?:O(?:RIST|WERS)|SMIDTH|IGHTS|Y)|A(?:I(?:TH|L)|SHION|NS?|RM)|U(?:RNITURE|TBOL|ND)|R(?:OGANS|L)?|(?:EEDBAC)?K)|D(?:[JMZ]|E(?:NT(?:IST|AL)|SI(?:GN)?|LIVERY|MOCRAT|GREE|ALS|V
 )?|I(?:(?:SCOUN|E)T|RECT(?:ORY)?|AMONDS|GITAL)|A(?:[DY]|T(?:ING|SUN|E)|BUR|NCE)|O(?:(?:MAIN|C)S|WNLOAD|OSAN|HA)?|(?:CL)?K|URBAN|VAG|NP)|T(?:[CDFGHJKLMNTVWZ]|O(?:(?:OL|UR|Y)S|SHIBA|DAY|KYO|WN|P)?|R(?:A(?:D(?:ING|E)|INING|VEL)|UST)?|I(?:(?:CKET|P)S|R(?:ES|OL)|ENDA)|E(?:CH(?:NOLOGY)?|MASEK|NNIS|L)|A(?:T(?:TOO|AR)|IPEI|X)|UI)|E(?:[CEG]|N(?:GINEER(?:ING)?|TERPRISES|ERGY)|X(?:P(?:OSED|ERT)|CHANGE)|U(?:ROVISION|S)?|(?:QUIPMEN|A)?T|VE(?:RBANK|NTS)|DU(?:CATION)?|M(?:ERCK|AIL)|S(?:TATE|Q)?|R(?:NI)?|PSON)|R(?:E(?:P(?:UBLICAN|AIR|ORT)|S(?:TAURAN)?T|D(?:STONE)?|I(?:SEN?|T)|N(?:TALS)?|VIEWS?|ALTOR|CIPES|HAB)?|O(?:CKS|DEO)?|I(?:[OP]|CH)|S(?:VP)?|U(?:HR)?|YUKYU|W)|L(?:[BCKRVY]|I(?:M(?:ITED|O)|GHTING|DL|FE|NK)?|A(?:T(?:ROBE)?|CAIXA|WYER|ND)?|O(?:TT[EO]|ANS?|NDON)|E(?:CLERC|ASE|GAL)|U(?:X(?:URY|E))?|T(?:DA)?|D?S|GBT)|I(?:[DELOQST]|N(?:[GK]|(?:VESTMENT|DUSTRIE)S|T(?:ERNATIONAL)?|S(?:TITUT|UR)E|F(?:INITI|O))?|M(?:MO(?:BILIEN)?)?|R(?:ISH)?|[BF]M|WC)|V(?:[CGU]|E(?:(?:NTURE|GA)S|RSICHERUNG|T)?|I(?:(?:AJE|
 LLA)S|SION|DEO)?|O(?:T(?:[EO]|ING)|YAGE|DKA)|(?:LAANDERE)?N|A(?:CATIONS)?)|H(?:[KMNRTU]|O(?:L(?:DINGS|IDAY)|ST(?:ING)?|[RU]SE|MES|W)|E(?:R(?:MES|E)|ALTHCARE|LP)|A(?:MBURG|NGOUT|US)|I(?:PHOP|V))|W(?:[FS]|E(?:B(?:SITE|CAM)|D(?:DING)?)|I(?:LLIAMHILL|E?N|KI)|A(?:LES|TCH|NG)|OR(?:KS?|LD)|HOSWHO|T[CF]|ME)|N(?:[FLOPUZ]|E(?:T(?:WORK)?|USTAR|WS?|XUS)?|I(?:SSAN|NJA|CO)?|A(?:GOYA|ME|VY)?|R[AW]?|GO?|Y?C|HK|TT)|K(?:[EGHMPWZ]|I(?:TCHEN|WI|M)?|O(?:MATSU|ELN)|(?:AUFE)?N|R(?:E?D)?|Y(?:OTO)?|DDI)|O(?:(?:(?:TSU|SA)K|KINAW)A|R(?:G(?:ANIC)?|ACLE)|N(?:[EG]|L(?:INE)?)|OO|VH|M)|Y(?:[ET]|O(?:(?:KOHAM|G)A|DOBASHI|UTUBE)|A(?:CHTS|NDEX))|J(?:[MP]|O(?:B(?:URG|S))?|E(?:TZT)?|UEGOS|AVA|CB)|U(?:[AGKSYZ]|N(?:IVERSITY|O)|OL)|Z(?:[AMW]|UERICH|ONE|IP)|Q(?:UEBEC|PON|A))/ix;
 
 # Two-Level TLDs
@@ -73,6 +70,10 @@ $VALID_TLDS_RE = qr/(?:X(?:N--(?:M(?:GB(
 # .ua : http://hostmaster.ua
 # .hu : http://www.domain.hu/domain/English/szabalyzat/sld.html
 #
+# This list is deprecated and unmaintained. It will become increasingly
+# out of date and will be removed in a future release.
+# As of 3.4.2, updates will be done in rules/20_aux_tlds.cf
+#
 foreach(qw/
 
   com.ac edu.ac gov.ac mil.ac net.ac org.ac
@@ -271,6 +272,10 @@ foreach (qw/
   $US_STATES{$_} = 1;
 }
 
+##
+## DO NOT UPDATE THIS DEPRECATED LIST
+## Everything is now maintained in sa-update 20_aux_tlds.cf
+##
 foreach (qw/
   demon.co.uk esc.edu.ar lkd.co.im plc.co.im
  /) {
@@ -293,6 +298,8 @@ Examples:
     "www.foo.com" => ( "www", "foo.com" )
     "www.foo.co.uk" => ( "www", "foo.co.uk" )
 
+DEPRECATED FUNCTION !!! See Mail::SpamAssassin::RegistryBoundaries !!!
+
 =cut
 
 sub split_domain {
@@ -369,6 +376,8 @@ Examples:
     "www.foo.com" => "foo.com"
     "www.foo.co.uk" => "foo.co.uk"
 
+DEPRECATED FUNCTION !!! See Mail::SpamAssassin::RegistryBoundaries !!!
+
 =cut
 
 sub trim_domain {
@@ -385,6 +394,8 @@ Return C<1> if the domain is valid, C<un
 (a) does not contain whitespace, (b) contains at least one dot, and (c)
 uses a valid TLD or ccTLD.
 
+DEPRECATED FUNCTION !!! See Mail::SpamAssassin::RegistryBoundaries !!!
+
 =back
 
 =cut

Modified: spamassassin/trunk/t/ip_addrs.t
URL: http://svn.apache.org/viewvc/spamassassin/trunk/t/ip_addrs.t?rev=1671621&r1=1671620&r2=1671621&view=diff
==============================================================================
--- spamassassin/trunk/t/ip_addrs.t (original)
+++ spamassassin/trunk/t/ip_addrs.t Mon Apr  6 17:52:12 2015
@@ -140,7 +140,7 @@ ok tryone Mail::SpamAssassin::Constants:
 sub tsttrim ($$) {
   my $dom = shift;
   my $want = shift;
-  my $got = Mail::SpamAssassin::Util::RegistrarBoundaries::trim_domain ($dom);
+  my $got = $sa->{registryboundaries}->trim_domain ($dom);
   if ($got eq $want) {
     return 1;
   } else {

Modified: spamassassin/trunk/t/uri.t
URL: http://svn.apache.org/viewvc/spamassassin/trunk/t/uri.t?rev=1671621&r1=1671620&r2=1671621&view=diff
==============================================================================
--- spamassassin/trunk/t/uri.t (original)
+++ spamassassin/trunk/t/uri.t Mon Apr  6 17:52:12 2015
@@ -64,7 +64,7 @@ ok (!$urimap{'CUMSLUTS..VIRGIN'});
 
 sub try_domains {
   my($try, $expect) = @_;
-  my $result = Mail::SpamAssassin::Util::uri_to_domain($try);
+  my $result = $sa->{registryboundaries}->uri_to_domain($try);
 
   # undef is valid in some situations, so deal with it...
   if (!defined $expect) {