You are viewing a plain text version of this content. The canonical link for it is here.
Posted to server-dev@james.apache.org by no...@apache.org on 2006/09/08 22:08:34 UTC

svn commit: r441631 - in /james/server/trunk/src: java/org/apache/james/smtpserver/core/filter/fastfail/ java/org/apache/james/util/urirbl/ test/org/apache/james/smtpserver/

Author: norman
Date: Fri Sep  8 13:08:33 2006
New Revision: 441631

URL: http://svn.apache.org/viewvc?view=rev&rev=441631
Log:
Add URIRBLHandler support based on a contribution. Thx to Mike Bryant. See JAMES-610

Added:
    james/server/trunk/src/java/org/apache/james/smtpserver/core/filter/fastfail/URIRBLHandler.java
    james/server/trunk/src/java/org/apache/james/util/urirbl/
    james/server/trunk/src/java/org/apache/james/util/urirbl/TLDLookup.java
    james/server/trunk/src/java/org/apache/james/util/urirbl/URIScanner.java
    james/server/trunk/src/test/org/apache/james/smtpserver/URIRBLHandlerTest.java

Added: james/server/trunk/src/java/org/apache/james/smtpserver/core/filter/fastfail/URIRBLHandler.java
URL: http://svn.apache.org/viewvc/james/server/trunk/src/java/org/apache/james/smtpserver/core/filter/fastfail/URIRBLHandler.java?view=auto&rev=441631
==============================================================================
--- james/server/trunk/src/java/org/apache/james/smtpserver/core/filter/fastfail/URIRBLHandler.java (added)
+++ james/server/trunk/src/java/org/apache/james/smtpserver/core/filter/fastfail/URIRBLHandler.java Fri Sep  8 13:08:33 2006
@@ -0,0 +1,256 @@
+/****************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one   *
+ * or more contributor license agreements.  See the NOTICE file *
+ * distributed with this work for additional information        *
+ * regarding copyright ownership.  The ASF licenses this file   *
+ * to you under the Apache License, Version 2.0 (the            *
+ * "License"); you may not use this file except in compliance   *
+ * with the License.  You may obtain a copy of the License at   *
+ *                                                              *
+ *   http://www.apache.org/licenses/LICENSE-2.0                 *
+ *                                                              *
+ * Unless required by applicable law or agreed to in writing,   *
+ * software distributed under the License is distributed on an  *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
+ * KIND, either express or implied.  See the License for the    *
+ * specific language governing permissions and limitations      *
+ * under the License.                                           *
+ ****************************************************************/
+
+
+
+
+package org.apache.james.smtpserver.core.filter.fastfail;
+
+import java.io.IOException;
+import java.net.UnknownHostException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.Iterator;
+
+import javax.mail.MessagingException;
+import javax.mail.internet.MimeBodyPart;
+import javax.mail.internet.MimeMessage;
+import javax.mail.internet.MimeMultipart;
+import javax.mail.internet.MimePart;
+
+import org.apache.avalon.framework.configuration.Configurable;
+import org.apache.avalon.framework.configuration.Configuration;
+import org.apache.avalon.framework.configuration.ConfigurationException;
+import org.apache.avalon.framework.logger.AbstractLogEnabled;
+import org.apache.avalon.framework.service.ServiceException;
+import org.apache.avalon.framework.service.ServiceManager;
+import org.apache.avalon.framework.service.Serviceable;
+import org.apache.james.services.DNSServer;
+import org.apache.james.smtpserver.MessageHandler;
+import org.apache.james.smtpserver.SMTPSession;
+import org.apache.james.util.mail.dsn.DSNStatus;
+import org.apache.james.util.urirbl.URIScanner;
+
+/**
+ * Extract domains from message and check against URIRBLServer
+ */
+public class URIRBLHandler extends AbstractLogEnabled implements MessageHandler,
+    Serviceable, Configurable {
+
+    private DNSServer dnsServer;
+
+    private Collection uriRbl;
+
+    private boolean getDetail = false;
+
+    private boolean checkAuthNetworks = false;
+    
+    /**
+     * @see org.apache.avalon.framework.service.Serviceable#service(ServiceManager)
+     */
+    public void service(ServiceManager serviceMan) throws ServiceException {
+        setDnsServer((DNSServer) serviceMan.lookup(DNSServer.ROLE));
+    }
+
+    /**
+     * @see org.apache.avalon.framework.configuration.Configurable#configure(Configuration)
+     */
+    public void configure(Configuration arg0) throws ConfigurationException {
+	boolean invalidConfig = false;
+	
+        Configuration serverConfiguration = arg0.getChild("uriRblServers", false);
+        if ( serverConfiguration != null ) {
+            ArrayList serverCollection = new ArrayList();
+            Configuration[] children = serverConfiguration.getChildren("server");
+            if ( children != null ) {
+                for ( int i = 0 ; i < children.length ; i++ ) {
+                    String rblServerName = children[i].getValue();
+                    serverCollection.add(rblServerName);
+                    if (getLogger().isInfoEnabled()) {
+                        getLogger().info("Adding uriRBL server: " + rblServerName);
+                    }
+                }
+                if (serverCollection != null && serverCollection.size() > 0) {
+                    setUriRblServer(serverCollection);
+                } else {
+                    invalidConfig = true;
+                }
+            }
+        } else {
+            invalidConfig = true;
+        }
+        
+        if (invalidConfig == true) {
+            throw new ConfigurationException("Please provide at least one server");
+        }
+    
+        Configuration configuration = arg0.getChild("getDetail",false);
+        if(configuration != null) {
+           getDetail = configuration.getValueAsBoolean();
+        }
+        
+        Configuration configRelay = arg0.getChild("checkAuthNetworks", false);
+        if (configRelay != null) {
+            setCheckAuthNetworks(configRelay.getValueAsBoolean(false));
+        }
+
+    }
+   
+    /**
+     * Set the UriRBL Servers
+     * 
+     * @param uriRbl The Collection holding the servers
+     */
+    public void setUriRblServer(Collection uriRbl) {
+        this.uriRbl = uriRbl;
+    }
+    
+    /**
+     * Set to true if AuthNetworks should be included in the EHLO check
+     * 
+     * @param checkAuthNetworks
+     *            Set to true to enable
+     */
+    public void setCheckAuthNetworks(boolean checkAuthNetworks) {
+        this.checkAuthNetworks = checkAuthNetworks;
+    }
+
+    /**
+     * Set the DNSServer
+     * 
+     * @param dnsServer
+     *            The DNSServer
+     */
+    public void setDnsServer(DNSServer dnsServer) {
+        this.dnsServer = dnsServer;
+    }
+
+    /**
+     * Set for try to get a TXT record for the blocked record. 
+     * 
+     * @param getDetail Set to ture for enable
+     */
+    public void setGetDetail(boolean getDetail) {
+        this.getDetail = getDetail;
+    }
+    
+    /**
+     * @see org.apache.james.smtpserver.MessageHandler#onMessage(SMTPSession)
+     */
+    public void onMessage(SMTPSession session) {
+        MimeMessage message;
+    
+        // Not scan the message if relaying allowed
+        if (session.isRelayingAllowed() && !checkAuthNetworks) {
+            return;
+        }
+        
+        try {
+            message = session.getMail().getMessage();
+
+            HashSet domains = scanMailForDomains(message);
+
+            Iterator fDomains = domains.iterator();
+            Iterator uRbl = uriRbl.iterator();
+
+            while (fDomains.hasNext()) {
+                String target = fDomains.next().toString();
+
+                while (uRbl.hasNext()) {
+                    try {
+                        String responseString = null;
+                        String detail = null;
+                        String uRblServer = uRbl.next().toString();
+                        dnsServer.getByName(target + "." + uRblServer);
+            
+                        if (getLogger().isInfoEnabled()) {
+                            getLogger().info("Message restricted by " +  uRblServer + " to SMTP AUTH/postmaster/abuse.");
+                        }
+
+                        // we should try to retrieve details
+                        if (getDetail) {
+                            Collection txt = dnsServer.findTXTRecords(target + "." + uRbl.next());
+
+                            // Check if we found a txt record
+                            if (!txt.isEmpty()) {
+                                // Set the detail
+                                detail = txt.iterator().next().toString();
+
+                            }
+                        }
+            
+                        if (detail != null) {
+                            responseString = "530 "
+                                + DSNStatus.getStatus(DSNStatus.PERMANENT, DSNStatus.SECURITY_AUTH) + " "
+                                + detail;
+                        } else {
+                            responseString = "530 "
+                                + DSNStatus.getStatus(DSNStatus.PERMANENT, DSNStatus.SECURITY_AUTH)
+                                + " Rejected: unauthenticated e-mail from "
+                                + session.getRemoteIPAddress()
+                                + " is restricted.  Contact the postmaster for details.";
+                        }  
+
+                        session.writeResponse(responseString);
+                        session.abortMessage();
+                        session.setStopHandlerProcessing(true);
+
+                    } catch (UnknownHostException uhe) {
+                        // domain not found. keep processing
+                    }
+                }
+            }
+        } catch (MessagingException e) {
+            getLogger().error(e.getMessage());
+        } catch (IOException e) {
+            getLogger().error(e.getMessage());
+        }
+    }
+
+    /**
+     * Recursively scans all MimeParts of an email for domain strings. Domain
+     * strings that are found are added to the supplied HashSet.
+     *
+     * @param domains HashSet for accumulating domain strings
+     * @param part MimePart to scan
+     */
+    private HashSet scanMailForDomains(MimePart part)
+        throws MessagingException, IOException {
+        HashSet domains = new HashSet();
+        getLogger().debug("mime type is: \"" + part.getContentType() + "\"");
+       
+        if (part.isMimeType("text/plain") || part.isMimeType("text/html")) {
+            getLogger().debug("scanning: \"" + part.getContent().toString() + "\"");
+            URIScanner.scanContentForDomains(domains, part.getContent().toString());
+        } else if (part.isMimeType("multipart/*")) {
+            MimeMultipart multipart = (MimeMultipart) part.getContent();
+            int count = multipart.getCount();
+            getLogger().debug("multipart count is: " + count);
+          
+            for (int index = 0; index < count; index++) {
+                getLogger().debug("recursing index: " + index);
+                MimeBodyPart mimeBodyPart = (MimeBodyPart) multipart.getBodyPart(index);
+                domains = scanMailForDomains(mimeBodyPart);
+            }
+        }
+        return domains;
+    }
+
+}

Added: james/server/trunk/src/java/org/apache/james/util/urirbl/TLDLookup.java
URL: http://svn.apache.org/viewvc/james/server/trunk/src/java/org/apache/james/util/urirbl/TLDLookup.java?view=auto&rev=441631
==============================================================================
--- james/server/trunk/src/java/org/apache/james/util/urirbl/TLDLookup.java (added)
+++ james/server/trunk/src/java/org/apache/james/util/urirbl/TLDLookup.java Fri Sep  8 13:08:33 2006
@@ -0,0 +1,1091 @@
+/****************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one   *
+ * or more contributor license agreements.  See the NOTICE file *
+ * distributed with this work for additional information        *
+ * regarding copyright ownership.  The ASF licenses this file   *
+ * to you under the Apache License, Version 2.0 (the            *
+ * "License"); you may not use this file except in compliance   *
+ * with the License.  You may obtain a copy of the License at   *
+ *                                                              *
+ *   http://www.apache.org/licenses/LICENSE-2.0                 *
+ *                                                              *
+ * Unless required by applicable law or agreed to in writing,   *
+ * software distributed under the License is distributed on an  *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
+ * KIND, either express or implied.  See the License for the    *
+ * specific language governing permissions and limitations      *
+ * under the License.                                           *
+ ****************************************************************/
+
+
+
+
+package org.apache.james.util.urirbl;
+
+import java.util.HashSet;
+
+/**
+ * A utility class that caches sets of multi-part top level domains (TLDs) for
+ * quick lookup.
+ */
+public class TLDLookup {
+    
+    /** Simple regular expression to match strings in the cache. Note: if the
+        collection of known mult-part TLDs change to contain characters other 
+        than these, this string must be modified. */
+    static private final String tld = "[A-Za-z0-9\\-]*";
+    
+    /** Simple regular expression that matches a two-part TLD */
+    static private final String tld2 = tld+"\\."+tld;
+    
+    /** Simple regular expression that matches a three-part TLD */
+    static private final String tld3 = tld+"\\."+tld+"\\."+tld;
+    
+    /** Array of all known multi-level TLDs */
+    static private final String[] multiPartTLDs = initMultiPartTLDs();
+    
+    /** A set of all known two-part TLDs */
+    static private final HashSet twoPartTLDs = initTwoPartTLDs();
+    
+    /** A set of all known three-part TLDs */
+    static private final HashSet threePartTLDs = initThreePartTLDs();
+
+    /** controls testing/debug output */
+    static private boolean testing = false;
+    
+    /**
+     * Determines if a two-part domain string (xxx.xxx) is contained in the 
+     * cache of known two-part TLDs.
+     *
+     * @param domain a String representing a two-part domain
+     * @return true if the domain string is found in the cache, false otherwise
+     */
+    static public boolean isTwoPartTLD(String domain) {
+        return twoPartTLDs.contains(domain);
+    }
+    
+    /**
+     * Determines if a three-part domain string (xxx.xxx.xxx) is contained in
+     * the cache of known three-part TLDs.
+     *
+     * @param domain a String representing a three-part domain
+     * @return true if the domain string is found in the cache, false otherwise
+     */
+    static public boolean isThreePartTLD(String domain) {
+        return threePartTLDs.contains(domain);
+    }
+    
+    /**
+     * Initialize two-part top-level domain cache.
+     *
+     * @return a HashSet containing all known two-part TLDs
+     */
+    static private HashSet initTwoPartTLDs() {
+        HashSet set = new HashSet(900);
+        for (int i=0; i<multiPartTLDs.length; i++) {
+            try {
+                if (multiPartTLDs[i].matches("^"+tld2+"$")) {
+                    set.add(multiPartTLDs[i]);
+                }
+            }
+            catch (Exception ex) {
+                debugOut(ex);
+            }
+        }
+        debugOut("initTwoPartTLDs size="+set.size());
+        return set;
+    }
+
+    /**
+     * Initialize three-part top-level domain cache.
+     *
+     * @return a HashSet containing all known three-part TLDs
+     */
+    static private HashSet initThreePartTLDs() {
+        HashSet set = new HashSet();
+        for (int i=0; i<multiPartTLDs.length; i++) {
+            try {
+                if (multiPartTLDs[i].matches("^"+tld3+"$")) {
+                    debugOut("adding \"" + multiPartTLDs[i] + "\"");
+                    set.add(multiPartTLDs[i]);
+                }
+            }
+            catch (Exception ex) {
+                debugOut(ex);
+            }
+        }
+        debugOut("initThreePartTLDs size="+set.size());
+        return set;
+    }
+    
+    /**
+     * Initialize an array of Strings containing all known multi-part TLDs
+     *
+     * @return an array of all known multi-part TLDs
+     */
+    static private String[] initMultiPartTLDs() {
+        String[] tmp = new String[] {
+            "com.ac",
+            "edu.ac",
+            "gov.ac",
+            "edu.ai",
+            "gov.ai",
+            "com.ar",
+            "net.ar",
+            "org.ar",
+            "gov.ar",
+            "mil.ar",
+            "edu.ar",
+            "int.ar",
+            "co.at",
+            "ac.at",
+            "or.at",
+            "gv.at",
+            "priv.at",
+            "com.au",
+            "gov.au",
+            "org.au",
+            "edu.au",
+            "id.au",
+            "oz.au",
+            "info.au",
+            "net.au",
+            "asn.au",
+            "csiro.au",
+            "telememo.au",
+            "conf.au",
+            "otc.au",
+            "com.az",
+            "net.az",
+            "org.az",
+            "com.bb",
+            "net.bb",
+            "org.bb",
+            "ac.be",
+            "belgie.be",
+            "dns.be",
+            "fgov.be",
+            "com.bh",
+            "gov.bh",
+            "net.bh",
+            "edu.bh",
+            "org.bh",
+            "com.bm",
+            "edu.bm",
+            "gov.bm",
+            "org.bm",
+            "net.bm",
+            "adm.br",
+            "adv.br",
+            "agr.br",
+            "am.br",
+            "arq.br",
+            "art.br",
+            "ato.br",
+            "bio.br",
+            "bmd.br",
+            "cim.br",
+            "cng.br",
+            "cnt.br",
+            "com.br",
+            "coop.br",
+            "ecn.br",
+            "edu.br",
+            "eng.br",
+            "esp.br",
+            "etc.br",
+            "eti.br",
+            "far.br",
+            "fm.br",
+            "fnd.br",
+            "fot.br",
+            "fst.br",
+            "g12.br",
+            "ggf.br",
+            "gov.br",
+            "imb.br",
+            "ind.br",
+            "inf.br",
+            "jor.br",
+            "lel.br",
+            "mat.br",
+            "med.br",
+            "mil.br",
+            "mus.br",
+            "net.br",
+            "nom.br",
+            "not.br",
+            "ntr.br",
+            "odo.br",
+            "org.br",
+            "ppg.br",
+            "pro.br",
+            "psc.br",
+            "psi.br",
+            "qsl.br",
+            "rec.br",
+            "slg.br",
+            "srv.br",
+            "tmp.br",
+            "trd.br",
+            "tur.br",
+            "tv.br",
+            "vet.br",
+            "zlg.br",
+            "com.bs",
+            "net.bs",
+            "org.bs",
+            "ab.ca",
+            "bc.ca",
+            "mb.ca",
+            "nb.ca",
+            "nf.ca",
+            "nl.ca",
+            "ns.ca",
+            "nt.ca",
+            "nu.ca",
+            "on.ca",
+            "pe.ca",
+            "qc.ca",
+            "sk.ca",
+            "yk.ca",
+            "co.ck",
+            "net.ck",
+            "org.ck",
+            "edu.ck",
+            "gov.ck",
+            "com.cn",
+            "edu.cn",
+            "gov.cn",
+            "net.cn",
+            "org.cn",
+            "ac.cn",
+            "ah.cn",
+            "bj.cn",
+            "cq.cn",
+            "gd.cn",
+            "gs.cn",
+            "gx.cn",
+            "gz.cn",
+            "hb.cn",
+            "he.cn",
+            "hi.cn",
+            "hk.cn",
+            "hl.cn",
+            "hn.cn",
+            "jl.cn",
+            "js.cn",
+            "ln.cn",
+            "mo.cn",
+            "nm.cn",
+            "nx.cn",
+            "qh.cn",
+            "sc.cn",
+            "sn.cn",
+            "sh.cn",
+            "sx.cn",
+            "tj.cn",
+            "tw.cn",
+            "xj.cn",
+            "xz.cn",
+            "yn.cn",
+            "zj.cn",
+            "arts.co",
+            "com.co",
+            "edu.co",
+            "firm.co",
+            "gov.co",
+            "info.co",
+            "int.co",
+            "nom.co",
+            "mil.co",
+            "org.co",
+            "rec.co",
+            "store.co",
+            "web.co",
+            "ac.cr",
+            "co.cr",
+            "ed.cr",
+            "fi.cr",
+            "go.cr",
+            "or.cr",
+            "sa.cr",
+            "com.cu",
+            "net.cu",
+            "org.cu",
+            "ac.cy",
+            "com.cy",
+            "gov.cy",
+            "net.cy",
+            "org.cy",
+            "co.dk",
+            "art.do",
+            "com.do",
+            "edu.do",
+            "gov.do",
+            "org.do",
+            "mil.do",
+            "net.do",
+            "web.do",
+            "com.dz",
+            "org.dz",
+            "net.dz",
+            "gov.dz",
+            "edu.dz",
+            "ass.dz",
+            "pol.dz",
+            "art.dz",
+            "com.ec",
+            "k12.ec",
+            "edu.ec",
+            "fin.ec",
+            "med.ec",
+            "gov.ec",
+            "mil.ec",
+            "org.ec",
+            "net.ec",
+            "com.eg",
+            "edu.eg",
+            "eun.eg",
+            "gov.eg",
+            "net.eg",
+            "org.eg",
+            "sci.eg",
+            "com.er",
+            "net.er",
+            "org.er",
+            "edu.er",
+            "mil.er",
+            "gov.er",
+            "ind.er",
+            "com.et",
+            "gov.et",
+            "org.et",
+            "edu.et",
+            "net.et",
+            "biz.et",
+            "name.et",
+            "info.et",
+            "ac.fj",
+            "com.fj",
+            "gov.fj",
+            "id.fj",
+            "org.fj",
+            "school.fj",
+            "com.fk",
+            "ac.fk",
+            "gov.fk",
+            "net.fk",
+            "nom.fk",
+            "org.fk",
+            "asso.fr",
+            "nom.fr",
+            "barreau.fr",
+            "com.fr",
+            "prd.fr",
+            "presse.fr",
+            "tm.fr",
+            "aeroport.fr",
+            "assedic.fr",
+            "avocat.fr",
+            "avoues.fr",
+            "cci.fr",
+            "chambagri.fr",
+            "chirurgiens-dentistes.fr",
+            "experts-comptables.fr",
+            "geometre-expert.fr",
+            "gouv.fr",
+            "greta.fr",
+            "huissier-justice.fr",
+            "medecin.fr",
+            "notaires.fr",
+            "pharmacien.fr",
+            "port.fr",
+            "veterinaire.fr",
+            "com.ge",
+            "edu.ge",
+            "gov.ge",
+            "mil.ge",
+            "net.ge",
+            "org.ge",
+            "pvt.ge",
+            "co.gg",
+            "org.gg",
+            "sch.gg",
+            "ac.gg",
+            "gov.gg",
+            "ltd.gg",
+            "ind.gg",
+            "net.gg",
+            "alderney.gg",
+            "guernsey.gg",
+            "sark.gg",
+            "com.gu",
+            "edu.gu",
+            "net.gu",
+            "org.gu",
+            "gov.gu",
+            "mil.gu",
+            "com.hk",
+            "net.hk",
+            "org.hk",
+            "idv.hk",
+            "gov.hk",
+            "edu.hk",
+            "co.hu",
+            "2000.hu",
+            "erotika.hu",
+            "jogasz.hu",
+            "sex.hu",
+            "video.hu",
+            "info.hu",
+            "agrar.hu",
+            "film.hu",
+            "konyvelo.hu",
+            "shop.hu",
+            "org.hu",
+            "bolt.hu",
+            "forum.hu",
+            "lakas.hu",
+            "suli.hu",
+            "priv.hu",
+            "casino.hu",
+            "games.hu",
+            "media.hu",
+            "szex.hu",
+            "sport.hu",
+            "city.hu",
+            "hotel.hu",
+            "news.hu",
+            "tozsde.hu",
+            "tm.hu",
+            "erotica.hu",
+            "ingatlan.hu",
+            "reklam.hu",
+            "utazas.hu",
+            "ac.id",
+            "co.id",
+            "go.id",
+            "mil.id",
+            "net.id",
+            "or.id",
+            "co.il",
+            "net.il",
+            "org.il",
+            "ac.il",
+            "gov.il",
+            "k12.il",
+            "muni.il",
+            "idf.il",
+            "co.im",
+            "net.im",
+            "org.im",
+            "ac.im",
+            "lkd.co.im",
+            "gov.im",
+            "nic.im",
+            "plc.co.im",
+            "co.in",
+            "net.in",
+            "ac.in",
+            "ernet.in",
+            "gov.in",
+            "nic.in",
+            "res.in",
+            "gen.in",
+            "firm.in",
+            "mil.in",
+            "org.in",
+            "ind.in",
+            "ac.je",
+            "co.je",
+            "net.je",
+            "org.je",
+            "gov.je",
+            "ind.je",
+            "jersey.je",
+            "ltd.je",
+            "sch.je",
+            "com.jo",
+            "org.jo",
+            "net.jo",
+            "gov.jo",
+            "edu.jo",
+            "mil.jo",
+            "ad.jp",
+            "ac.jp",
+            "co.jp",
+            "go.jp",
+            "or.jp",
+            "ne.jp",
+            "gr.jp",
+            "ed.jp",
+            "lg.jp",
+            "net.jp",
+            "org.jp",
+            "gov.jp",
+            "hokkaido.jp",
+            "aomori.jp",
+            "iwate.jp",
+            "miyagi.jp",
+            "akita.jp",
+            "yamagata.jp",
+            "fukushima.jp",
+            "ibaraki.jp",
+            "tochigi.jp",
+            "gunma.jp",
+            "saitama.jp",
+            "chiba.jp",
+            "tokyo.jp",
+            "kanagawa.jp",
+            "niigata.jp",
+            "toyama.jp",
+            "ishikawa.jp",
+            "fukui.jp",
+            "yamanashi.jp",
+            "nagano.jp",
+            "gifu.jp",
+            "shizuoka.jp",
+            "aichi.jp",
+            "mie.jp",
+            "shiga.jp",
+            "kyoto.jp",
+            "osaka.jp",
+            "hyogo.jp",
+            "nara.jp",
+            "wakayama.jp",
+            "tottori.jp",
+            "shimane.jp",
+            "okayama.jp",
+            "hiroshima.jp",
+            "yamaguchi.jp",
+            "tokushima.jp",
+            "kagawa.jp",
+            "ehime.jp",
+            "kochi.jp",
+            "fukuoka.jp",
+            "saga.jp",
+            "nagasaki.jp",
+            "kumamoto.jp",
+            "oita.jp",
+            "miyazaki.jp",
+            "kagoshima.jp",
+            "okinawa.jp",
+            "sapporo.jp",
+            "sendai.jp",
+            "yokohama.jp",
+            "kawasaki.jp",
+            "nagoya.jp",
+            "kobe.jp",
+            "kitakyushu.jp",
+            "utsunomiya.jp",
+            "kanazawa.jp",
+            "takamatsu.jp",
+            "matsuyama.jp",
+            "com.kh",
+            "net.kh",
+            "org.kh",
+            "per.kh",
+            "edu.kh",
+            "gov.kh",
+            "mil.kh",
+            "ac.kr",
+            "co.kr",
+            "go.kr",
+            "ne.kr",
+            "or.kr",
+            "pe.kr",
+            "re.kr",
+            "seoul.kr",
+            "kyonggi.kr",
+            "com.kw",
+            "net.kw",
+            "org.kw",
+            "edu.kw",
+            "gov.kw",
+            "com.la",
+            "net.la",
+            "org.la",
+            "com.lb",
+            "org.lb",
+            "net.lb",
+            "edu.lb",
+            "gov.lb",
+            "mil.lb",
+            "com.lc",
+            "edu.lc",
+            "gov.lc",
+            "net.lc",
+            "org.lc",
+            "com.lv",
+            "net.lv",
+            "org.lv",
+            "edu.lv",
+            "gov.lv",
+            "mil.lv",
+            "id.lv",
+            "asn.lv",
+            "conf.lv",
+            "com.ly",
+            "net.ly",
+            "org.ly",
+            "co.ma",
+            "net.ma",
+            "org.ma",
+            "press.ma",
+            "ac.ma",
+            "com.mk",
+            "com.mm",
+            "net.mm",
+            "org.mm",
+            "edu.mm",
+            "gov.mm",
+            "com.mo",
+            "net.mo",
+            "org.mo",
+            "edu.mo",
+            "gov.mo",
+            "com.mt",
+            "net.mt",
+            "org.mt",
+            "edu.mt",
+            "tm.mt",
+            "uu.mt",
+            "com.mx",
+            "net.mx",
+            "org.mx",
+            "com.my",
+            "org.my",
+            "gov.my",
+            "edu.my",
+            "net.my",
+            "com.na",
+            "org.na",
+            "net.na",
+            "alt.na",
+            "edu.na",
+            "cul.na",
+            "unam.na",
+            "telecom.na",
+            "com.nc",
+            "net.nc",
+            "org.nc",
+            "ac.ng",
+            "edu.ng",
+            "sch.ng",
+            "com.ng",
+            "gov.ng",
+            "org.ng",
+            "net.ng",
+            "gob.ni",
+            "com.ni",
+            "net.ni",
+            "edu.ni",
+            "nom.ni",
+            "org.ni",
+            "com.np",
+            "net.np",
+            "org.np",
+            "gov.np",
+            "edu.np",
+            "ac.nz",
+            "co.nz",
+            "cri.nz",
+            "gen.nz",
+            "geek.nz",
+            "govt.nz",
+            "iwi.nz",
+            "maori.nz",
+            "mil.nz",
+            "net.nz",
+            "org.nz",
+            "school.nz",
+            "com.om",
+            "co.om",
+            "edu.om",
+            "ac.om",
+            "gov.om",
+            "net.om",
+            "org.om",
+            "mod.om",
+            "museum.om",
+            "biz.om",
+            "pro.om",
+            "med.om",
+            "com.pa",
+            "net.pa",
+            "org.pa",
+            "edu.pa",
+            "ac.pa",
+            "gob.pa",
+            "sld.pa",
+            "edu.pe",
+            "gob.pe",
+            "nom.pe",
+            "mil.pe",
+            "org.pe",
+            "com.pe",
+            "net.pe",
+            "com.pg",
+            "net.pg",
+            "ac.pg",
+            "com.ph",
+            "net.ph",
+            "org.ph",
+            "mil.ph",
+            "ngo.ph",
+            "aid.pl",
+            "agro.pl",
+            "atm.pl",
+            "auto.pl",
+            "biz.pl",
+            "com.pl",
+            "edu.pl",
+            "gmina.pl",
+            "gsm.pl",
+            "info.pl",
+            "mail.pl",
+            "miasta.pl",
+            "media.pl",
+            "mil.pl",
+            "net.pl",
+            "nieruchomosci.pl",
+            "nom.pl",
+            "org.pl",
+            "pc.pl",
+            "powiat.pl",
+            "priv.pl",
+            "realestate.pl",
+            "rel.pl",
+            "sex.pl",
+            "shop.pl",
+            "sklep.pl",
+            "sos.pl",
+            "szkola.pl",
+            "targi.pl",
+            "tm.pl",
+            "tourism.pl",
+            "travel.pl",
+            "turystyka.pl",
+            "com.pk",
+            "net.pk",
+            "edu.pk",
+            "org.pk",
+            "fam.pk",
+            "biz.pk",
+            "web.pk",
+            "gov.pk",
+            "gob.pk",
+            "gok.pk",
+            "gon.pk",
+            "gop.pk",
+            "gos.pk",
+            "edu.ps",
+            "gov.ps",
+            "plo.ps",
+            "sec.ps",
+            "com.py",
+            "net.py",
+            "org.py",
+            "edu.py",
+            "com.qa",
+            "net.qa",
+            "org.qa",
+            "edu.qa",
+            "gov.qa",
+            "asso.re",
+            "com.re",
+            "nom.re",
+            "com.ru",
+            "net.ru",
+            "org.ru",
+            "pp.ru",
+            "com.sa",
+            "edu.sa",
+            "sch.sa",
+            "med.sa",
+            "gov.sa",
+            "net.sa",
+            "org.sa",
+            "pub.sa",
+            "com.sb",
+            "net.sb",
+            "org.sb",
+            "edu.sb",
+            "gov.sb",
+            "com.sd",
+            "net.sd",
+            "org.sd",
+            "edu.sd",
+            "sch.sd",
+            "med.sd",
+            "gov.sd",
+            "tm.se",
+            "press.se",
+            "parti.se",
+            "brand.se",
+            "fh.se",
+            "fhsk.se",
+            "fhv.se",
+            "komforb.se",
+            "kommunalforbund.se",
+            "komvux.se",
+            "lanarb.se",
+            "lanbib.se",
+            "naturbruksgymn.se",
+            "sshn.se",
+            "org.se",
+            "pp.se",
+            "com.sg",
+            "net.sg",
+            "org.sg",
+            "edu.sg",
+            "gov.sg",
+            "per.sg",
+            "com.sh",
+            "net.sh",
+            "org.sh",
+            "edu.sh",
+            "gov.sh",
+            "mil.sh",
+            "gov.st",
+            "saotome.st",
+            "principe.st",
+            "consulado.st",
+            "embaixada.st",
+            "org.st",
+            "edu.st",
+            "net.st",
+            "com.st",
+            "store.st",
+            "mil.st",
+            "co.st",
+            "com.sv",
+            "org.sv",
+            "edu.sv",
+            "gob.sv",
+            "red.sv",
+            "com.sy",
+            "net.sy",
+            "org.sy",
+            "gov.sy",
+            "ac.th",
+            "co.th",
+            "go.th",
+            "net.th",
+            "or.th",
+            "com.tn",
+            "net.tn",
+            "org.tn",
+            "edunet.tn",
+            "gov.tn",
+            "ens.tn",
+            "fin.tn",
+            "nat.tn",
+            "ind.tn",
+            "info.tn",
+            "intl.tn",
+            "rnrt.tn",
+            "rnu.tn",
+            "rns.tn",
+            "tourism.tn",
+            "com.tr",
+            "net.tr",
+            "org.tr",
+            "edu.tr",
+            "gov.tr",
+            "mil.tr",
+            "bbs.tr",
+            "k12.tr",
+            "gen.tr",
+            "co.tt",
+            "com.tt",
+            "org.tt",
+            "net.tt",
+            "biz.tt",
+            "info.tt",
+            "pro.tt",
+            "name.tt",
+            "gov.tt",
+            "edu.tt",
+            "nic.tt",
+            "us.tt",
+            "uk.tt",
+            "ca.tt",
+            "eu.tt",
+            "es.tt",
+            "fr.tt",
+            "it.tt",
+            "se.tt",
+            "dk.tt",
+            "be.tt",
+            "de.tt",
+            "at.tt",
+            "au.tt",
+            "co.tv",
+            "com.tw",
+            "net.tw",
+            "org.tw",
+            "edu.tw",
+            "idv.tw",
+            "gove.tw",
+            "com.ua",
+            "net.ua",
+            "org.ua",
+            "edu.ua",
+            "gov.ua",
+            "ac.ug",
+            "co.ug",
+            "or.ug",
+            "go.ug",
+            "co.uk",
+            "me.uk",
+            "org.uk",
+            "edu.uk",
+            "ltd.uk",
+            "plc.uk",
+            "net.uk",
+            "sch.uk",
+            "nic.uk",
+            "ac.uk",
+            "gov.uk",
+            "nhs.uk",
+            "police.uk",
+            "mod.uk",
+            "dni.us",
+            "fed.us",
+            "com.uy",
+            "edu.uy",
+            "net.uy",
+            "org.uy",
+            "gub.uy",
+            "mil.uy",
+            "com.ve",
+            "net.ve",
+            "org.ve",
+            "co.ve",
+            "edu.ve",
+            "gov.ve",
+            "mil.ve",
+            "arts.ve",
+            "bib.ve",
+            "firm.ve",
+            "info.ve",
+            "int.ve",
+            "nom.ve",
+            "rec.ve",
+            "store.ve",
+            "tec.ve",
+            "web.ve",
+            "co.vi",
+            "net.vi",
+            "org.vi",
+            "com.vn",
+            "biz.vn",
+            "edu.vn",
+            "gov.vn",
+            "net.vn",
+            "org.vn",
+            "int.vn",
+            "ac.vn",
+            "pro.vn",
+            "info.vn",
+            "health.vn",
+            "name.vn",
+            "com.vu",
+            "edu.vu",
+            "net.vu",
+            "org.vu",
+            "de.vu",
+            "ch.vu",
+            "fr.vu",
+            "com.ws",
+            "net.ws",
+            "org.ws",
+            "gov.ws",
+            "edu.ws",
+            "ac.yu",
+            "co.yu",
+            "edu.yu",
+            "org.yu",
+            "com.ye",
+            "net.ye",
+            "org.ye",
+            "gov.ye",
+            "edu.ye",
+            "mil.ye",
+            "ac.za",
+            "alt.za",
+            "bourse.za",
+            "city.za",
+            "co.za",
+            "edu.za",
+            "gov.za",
+            "law.za",
+            "mil.za",
+            "net.za",
+            "ngo.za",
+            "nom.za",
+            "org.za",
+            "school.za",
+            "tm.za",
+            "web.za",
+            "co.zw",
+            "ac.zw",
+            "org.zw",
+            "gov.zw",
+            "eu.org",
+            "au.com",
+            "br.com",
+            "cn.com",
+            "de.com",
+            "de.net",
+            "eu.com",
+            "gb.com",
+            "gb.net",
+            "hu.com",
+            "no.com",
+            "qc.com",
+            "ru.com",
+            "sa.com",
+            "se.com",
+            "uk.com",
+            "uk.net",
+            "us.com",
+            "uy.com",
+            "za.com",
+            "dk.org",
+            "tel.no",
+            "fax.nr",
+            "mob.nr",
+            "mobil.nr",
+            "mobile.nr",
+            "tel.nr",
+            "tlf.nr",
+            "e164.arpa"
+        };
+        debugOut("array size=" + tmp.length);
+        return tmp;
+    }
+    
+    /**
+     * Debugging output
+     */
+    private static void debugOut(String msg) {
+        if (true == testing) {
+            System.out.println(msg);
+        }
+    }
+    
+    /**
+     * Debugging output
+     */
+    private static void debugOut(Throwable th) {
+        if (true == testing) {
+            System.out.println(th);
+        }
+    }
+}
+
+
+

Added: james/server/trunk/src/java/org/apache/james/util/urirbl/URIScanner.java
URL: http://svn.apache.org/viewvc/james/server/trunk/src/java/org/apache/james/util/urirbl/URIScanner.java?view=auto&rev=441631
==============================================================================
--- james/server/trunk/src/java/org/apache/james/util/urirbl/URIScanner.java (added)
+++ james/server/trunk/src/java/org/apache/james/util/urirbl/URIScanner.java Fri Sep  8 13:08:33 2006
@@ -0,0 +1,378 @@
+/****************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one   *
+ * or more contributor license agreements.  See the NOTICE file *
+ * distributed with this work for additional information        *
+ * regarding copyright ownership.  The ASF licenses this file   *
+ * to you under the Apache License, Version 2.0 (the            *
+ * "License"); you may not use this file except in compliance   *
+ * with the License.  You may obtain a copy of the License at   *
+ *                                                              *
+ *   http://www.apache.org/licenses/LICENSE-2.0                 *
+ *                                                              *
+ * Unless required by applicable law or agreed to in writing,   *
+ * software distributed under the License is distributed on an  *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
+ * KIND, either express or implied.  See the License for the    *
+ * specific language governing permissions and limitations      *
+ * under the License.                                           *
+ ****************************************************************/
+
+
+
+
+package org.apache.james.util.urirbl;
+
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.regex.*;
+import java.net.URI;
+
+public class URIScanner {
+
+    /* These regular expressions "inspired" by Spamassassin */
+    static private final String reserved = ";/?:@&=+$,[]\\#|";
+
+    static private final String reservedNoColon = ";/?@&=+$,[]\\#|";
+
+    static private final String mark = "-_.!~*'()";
+
+    static private final String unreserved = "A-Za-z0-9" + escape(mark)
+        + "\\x00-\\x08\\x0b\\x0c\\x0e-\\x1f";
+
+    static private final String uricSet = escape(reserved) + unreserved + "%";
+
+    static private final String uricNoColon = escape(reservedNoColon)
+        + unreserved + "%";
+
+    static private final String schemeRE = "(?-xism:(?:https?|ftp|mailto|javascript|file))";
+
+    static private final String schemelessRE = "(?-xism:(?<![.=])(?:(?i)www\\d*\\.|(?i)ftp\\.))";
+
+    static private final String uriRE = "(?-xism:\\b(?:" + schemeRE + ":["
+        + uricNoColon + "]|" + schemelessRE + ")[" + uricSet + "#]*)";
+
+    /** Pre-compiled pattern that matches URIs */
+    static private final Pattern uriPattern = Pattern.compile(uriRE);
+
+    /** Pre-compiled pattern that matches URI scheme strings */
+    static private final Pattern schemePattern = Pattern.compile("^" + schemeRE
+        + ":");
+
+    /** Pre-compiled pattern used to cleanup a found URI string */
+    static private final Pattern uriCleanup = Pattern.compile("^<(.*)>$");
+
+    /** Pre-compiled pattern used to cleanup a found URI string */
+    static private final Pattern uriCleanup2 = Pattern.compile("[\\]\\)>#]$");
+
+    /** Pre-compile pattern for identifying "mailto" patterns */
+    static private final Pattern uriCleanup3 = Pattern
+        .compile("^(?i)mailto:([^\\/]{2})(.*)$");
+
+    /* These regular expressions also "inspired" by Spamassassin */
+    static private final String esc = "\\\\";
+
+    static private final String period = "\\.";
+
+    static private final String space = "\\040";
+
+    static private final String open_br = "\\[";
+
+    static private final String close_br = "\\]";
+
+    static private final String nonASCII = "\\x80-\\xff";
+
+    static private final String ctrl = "\\000-\\037";
+
+    static private final String cr_list = "\\n\\015";
+
+    static private final String qtext = "[^" + esc + nonASCII + cr_list + "\"]";
+
+    static private final String dtext = "[^" + esc + nonASCII + cr_list
+        + open_br + close_br + "]";
+
+    static private final String quoted_pair = esc + "[^" + nonASCII + "]";
+
+    static private final String atom_char = "[^(" + space + ")<>@,;:\"." + esc
+        + open_br + close_br + ctrl + nonASCII + "]";
+
+    static private final String atom = "(?>" + atom_char + "+)";
+
+    static private final String quoted_str = "\"" + qtext + "*(?:"
+        + quoted_pair + qtext + "*)*\"";
+
+    static private final String word = "(?:" + atom + "|" + quoted_str + ")";
+
+    static private final String local_part = word + "(?:" + period + word
+        + ")*";
+
+    static private final String label = "[A-Za-z\\d](?:[A-Za-z\\d-]*[A-Za-z\\d])?";
+
+    static private final String domain_ref = label + "(?:" + period + label
+        + ")*";
+
+    static private final String domain_lit = open_br + "(?:" + dtext + "|"
+        + quoted_pair + ")*" + close_br;
+
+    static private final String domain = "(?:" + domain_ref + "|" + domain_lit
+        + ")";
+
+    static private final String Addr_spec_re = "(?-xism:" + local_part
+        + "\\s*\\@\\s*" + domain + ")";
+
+    /** Pre-compiled pattern for matching "schemeless" mailto strings */
+    static private final Pattern emailAddrPattern = Pattern
+        .compile(Addr_spec_re);
+
+    /** Simple reqular expression to match an octet part of an IP address */
+    static private final String octet = "(?:[1-2][0-9][0-9])|(?:[1-9][0-9])|(?:[0-9])";
+
+    /** Simple regular expression to match a part of a domain string in the
+     TLDLookup cache. */
+    static private final String tld = "[A-Za-z0-9\\-]*";
+
+    /** Simple regular expression that matches a two-part TLD */
+    static private final String tld2 = tld + "\\." + tld;
+
+    /** Simple regular expression that matches a three-part TLD */
+    static private final String tld3 = tld + "\\." + tld + "\\." + tld;
+
+    /** Regular expression that matches and captures parts of a possible 
+     one-part TLD domain string */
+    static private final String tldCap = "(" + tld + "\\.(" + tld + "))$";
+
+    /** Regular expression that matches and captures parts of a possible 
+     two-part TLD domain string */
+    static private final String tld2Cap = "(" + tld + "\\.(" + tld2 + "))$";
+
+    /** Regular expression that matches and captures parts of a possible 
+     three-part TLD domain string */
+    static private final String tld3Cap = "(" + tld + "\\.(" + tld3 + "))$";
+
+    /** Regular expression that matches and captures parts of an IP address */
+    static private final String ipCap = "((" + octet + ")\\.(" + octet
+        + ")\\.(" + octet + ")\\.(" + octet + "))$";
+
+    /** Pre-compiled pattern that matches IP addresses */
+    static private final Pattern ipCapPattern = Pattern.compile(ipCap);
+
+    /** Pre-compiled pattern that matches domain string that is possibly
+     contained in a one-part TLD */
+    static private final Pattern tldCapPattern = Pattern.compile(tldCap);
+
+    /** Pre-compiled pattern that matches domain string that is possibly
+     contained in a two-part TLD */
+    static private final Pattern tld2CapPattern = Pattern.compile(tld2Cap);
+
+    /** Pre-compiled pattern that matches domain string that is possibly
+     contained in a three-part TLD */
+    static private final Pattern tld3CapPattern = Pattern.compile(tld3Cap);
+
+    /** controls testing/debug output */
+    static private boolean testing = false;
+
+    /**
+     * Scans a character sequence for URIs. Then add all unique domain strings 
+     * derived from those found URIs to the supplied HashSet.
+     * <p>
+     * This function calls scanContentForHosts() to grab all the host strings.
+     * Then it calls domainFromHost() on each host string found to distill them
+     * to their basic "registrar" domains. 
+     *
+     * @param domains a HashSet to be populated with all domain strings found in
+     *        the content
+     * @param content a character sequence to be scanned for URIs
+     */
+    static public void scanContentForDomains(HashSet domains,
+        CharSequence content) {
+    HashSet hosts = scanContentForHosts(content);
+    for (Iterator i = hosts.iterator(); i.hasNext();) {
+        String domain = domainFromHost((String) i.next());
+        if (null != domain) {
+        if (false == domains.contains(domain)) {
+            domains.add(domain);
+        }
+        }
+    }
+    }
+
+    /**
+     * Scans a character sequence for URIs. Then returns all unique host strings 
+     * derived from those found URIs in a HashSet
+     *
+     * @param content a character sequence to be scanned for URIs
+     * @return a HashSet containing host strings
+     */
+    static protected HashSet scanContentForHosts(CharSequence content) {
+    HashSet set = new HashSet();
+    try {
+        // look for URIs
+        Matcher mat = uriPattern.matcher(content);
+        while (mat.find()) {
+        String found = mat.group();
+        Matcher cleanMat = uriCleanup.matcher(found);
+        if (cleanMat.find()) {
+            found = cleanMat.group(1);
+        }
+        cleanMat = uriCleanup2.matcher(found);
+        if (cleanMat.find()) {
+            found = cleanMat.replaceAll("");
+        }
+        cleanMat = uriCleanup3.matcher(found);
+        if (cleanMat.find()) {
+            found = "mailto://" + cleanMat.group(1) + cleanMat.group(2);
+        }
+        cleanMat = schemePattern.matcher(found);
+        if (!cleanMat.find()) {
+            if (found.matches("^(?i)www\\d*\\..*")) {
+            found = "http://" + found;
+            } else if (found.matches("^(?i)ftp\\..*")) {
+            found = "ftp://" + found;
+            }
+        }
+        String host = hostFromUriStr(found);
+        if (null != host) {
+            host = host.toLowerCase();
+            if (false == set.contains(host)) {
+            set.add(host);
+            }
+        }
+        }
+
+        // look for "schemeless" email addresses, too
+        mat = emailAddrPattern.matcher(content);
+        while (mat.find()) {
+        String found = mat.group();
+        debugOut("******** mailfound=\"" + found + "\"");
+        found = "mailto://" + found;
+        debugOut("*******6 mailfoundfound=\"" + found
+            + "\" after cleanup 6");
+        String host = hostFromUriStr(found);
+        if (null != host) {
+            host = host.toLowerCase();
+            if (false == set.contains(host)) {
+            set.add(host);
+            }
+        }
+        }
+    } catch (Exception ex) {
+        debugOut(ex.toString());
+        ex.printStackTrace();
+    }
+    return set;
+    }
+
+    /**
+     * Extracts and returns the host portion of URI string.
+     *
+     * This function uses java.net.URI.
+     *
+     * @param uriStr a string containing a URI
+     * @return the host portion of the supplied URI, null if no host string
+     *         could be found
+     */
+    static protected String hostFromUriStr(String uriStr) {
+    debugOut("hostFromUriStr(\"" + uriStr + "\")");
+    String host = null;
+    try {
+        URI uri = new URI(uriStr);
+        host = uri.getHost();
+    } catch (Exception ex) {
+    }
+    return host;
+    }
+
+    /**
+     * Extracts and returns the registrar domain portion of a host string. This
+     * funtion checks all known multi-part TLDs to make sure that registrar
+     * domain is complete. For example, if the supplied host string is
+     * "subdomain.example.co.uk", the TLD is "co.uk" and not "uk". Therefore,
+     * the correct registrar domain is not "co.uk", but "example.co.uk". If the
+     * domain string is an IP address, then the octets are returned in reverse
+     * order.
+     *
+     * @param host a string containing a host name
+     * @return the registrar domain portion of the supplied host string
+     */
+    static protected String domainFromHost(String host) {
+    debugOut("domainFromHost(\"" + host + "\")");
+    String domain = null;
+    Matcher mat;
+    try {
+
+        // IP addrs 
+        mat = ipCapPattern.matcher(host);
+        if (mat.find()) {
+        // reverse the octets now
+        domain = mat.group(5) + "." + mat.group(4) + "." + mat.group(3)
+            + "." + mat.group(2);
+        debugOut("domain=\"" + domain + "\"");
+        return domain;
+        }
+
+        // 3-part TLDs
+        mat = tld3CapPattern.matcher(host);
+        if (mat.find()) {
+        String tld = mat.group(2);
+        if (TLDLookup.isThreePartTLD(tld)) {
+            domain = mat.group(1);
+            debugOut("domain=\"" + domain + ", tld=\"" + tld + "\"");
+            return domain;
+        }
+        }
+
+        // 2-part TLDs
+        mat = tld2CapPattern.matcher(host);
+        if (mat.find()) {
+        String tld = mat.group(2);
+        if (TLDLookup.isTwoPartTLD(tld)) {
+            domain = mat.group(1);
+            debugOut("domain=\"" + domain + ", tld=\"" + tld + "\"");
+            return domain;
+        }
+        }
+
+        // 1-part TLDs
+        mat = tldCapPattern.matcher(host);
+        if (mat.find()) {
+        String tld = mat.group(2);
+        domain = mat.group(1);
+        debugOut("domain=\"" + domain + ", tld=\"" + tld + "\"");
+        return domain;
+        }
+    } catch (Exception ex) {
+        debugOut(ex.toString());
+        ex.printStackTrace();
+    }
+    return domain;
+    }
+
+    /**
+     * Debugging output
+     */
+    private static void debugOut(String msg) {
+    if (true == testing) {
+        System.out.println(msg);
+    }
+    }
+
+    /**
+     * A utility function that "escapes" special characters in a string.
+     *
+     * @param str a string to be processed
+     * @return modified "escaped" string
+     */
+    private static String escape(String str) {
+    StringBuffer buffer = new StringBuffer();
+    for (int i = 0; i < str.length(); i++) {
+        char ch = str.charAt(i);
+        if (Character.isDigit(ch) || Character.isUpperCase(ch)
+            || Character.isLowerCase(ch) || ch == '_') {
+        buffer.append(ch);
+        } else {
+        buffer.append("\\");
+        buffer.append(ch);
+        }
+    }
+    return buffer.toString();
+    }
+}

Added: james/server/trunk/src/test/org/apache/james/smtpserver/URIRBLHandlerTest.java
URL: http://svn.apache.org/viewvc/james/server/trunk/src/test/org/apache/james/smtpserver/URIRBLHandlerTest.java?view=auto&rev=441631
==============================================================================
--- james/server/trunk/src/test/org/apache/james/smtpserver/URIRBLHandlerTest.java (added)
+++ james/server/trunk/src/test/org/apache/james/smtpserver/URIRBLHandlerTest.java Fri Sep  8 13:08:33 2006
@@ -0,0 +1,209 @@
+/****************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one   *
+ * or more contributor license agreements.  See the NOTICE file *
+ * distributed with this work for additional information        *
+ * regarding copyright ownership.  The ASF licenses this file   *
+ * to you under the Apache License, Version 2.0 (the            *
+ * "License"); you may not use this file except in compliance   *
+ * with the License.  You may obtain a copy of the License at   *
+ *                                                              *
+ *   http://www.apache.org/licenses/LICENSE-2.0                 *
+ *                                                              *
+ * Unless required by applicable law or agreed to in writing,   *
+ * software distributed under the License is distributed on an  *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
+ * KIND, either express or implied.  See the License for the    *
+ * specific language governing permissions and limitations      *
+ * under the License.                                           *
+ ****************************************************************/
+
+
+package org.apache.james.smtpserver;
+
+import java.io.IOException;
+import java.net.InetAddress;
+import java.net.UnknownHostException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+
+import javax.mail.MessagingException;
+import javax.mail.internet.MimeMessage;
+
+import junit.framework.TestCase;
+
+import org.apache.avalon.framework.container.ContainerUtil;
+import org.apache.james.services.DNSServer;
+import org.apache.james.smtpserver.core.filter.fastfail.URIRBLHandler;
+import org.apache.james.test.mock.avalon.MockLogger;
+import org.apache.james.test.mock.javaxmail.MockMimeMessage;
+import org.apache.james.test.mock.mailet.MockMail;
+import org.apache.mailet.Mail;
+
+public class URIRBLHandlerTest extends TestCase {
+    private static final String BAD_DOMAIN1 = "bad.domain.multi.surbl.org";
+    private static final String BAD_DOMAIN2 = "bad2.domain.multi.surbl.org";
+    private static final String GOOD_DOMAIN = "good.domain.multi.surbl.org";
+    private SMTPSession mockedSMTPSession;
+
+    private String response = null;
+    
+    public void setUp() {
+        // reset reponse
+        response = null;
+    }
+
+    private SMTPSession setupMockedSMTPSession(final Mail mail) {
+        mockedSMTPSession = new AbstractSMTPSession() {
+
+            private HashMap state = new HashMap();
+
+            private String ipAddress = "192.168.0.1";
+
+            private String host = "localhost";
+
+            private boolean relayingAllowed;
+
+            public void abortMessage() {
+            }
+
+            public Mail getMail() {
+                return mail;
+            }
+
+            public String getRemoteHost() {
+                return host;
+            }
+
+            public String getRemoteIPAddress() {
+                return ipAddress;
+            }
+
+            public Map getState() {
+                state.put(SMTPSession.SENDER, "sender@james.apache.org");
+                return state;
+            }
+
+            public boolean isRelayingAllowed() {
+                return relayingAllowed;
+            }
+
+            public void setRelayingAllowed(boolean relayingAllowed) {
+                this.relayingAllowed = relayingAllowed;
+            }
+
+            public void writeResponse(String respString) {
+                response = respString;
+            }
+        };
+
+        return mockedSMTPSession;
+
+    }
+
+    private String getResponse() {
+        return response;
+    }
+
+    private Mail setupMockedMail(MimeMessage message) {
+        MockMail mail = new MockMail();
+        mail.setMessage(message);
+        return mail;
+    }
+
+    public MimeMessage setupMockedMimeMessage(String text)
+            throws MessagingException {
+        MimeMessage message = new MimeMessage(new MockMimeMessage());
+        message.setText(text);
+        message.saveChanges();
+
+        return message;
+    }
+
+    /**
+     * Setup the mocked dnsserver
+     *
+     */
+    private DNSServer setupMockedDnsServer() {
+        DNSServer mockedDnsServer = new DNSServer() {
+
+            public Collection findMXRecords(String hostname) {
+                throw new UnsupportedOperationException("Unimplemented in mock");
+            }
+
+            public Collection findTXTRecords(String hostname) {
+                List res = new ArrayList();
+                if (hostname == null) {
+                    return res;
+                }
+                ;
+                if (BAD_DOMAIN1.equals(hostname)) {
+                    res.add("Blocked - see http://www.surbl.org");
+                }
+                return res;
+            }
+
+            public Iterator getSMTPHostAddresses(String domainName) {
+                throw new UnsupportedOperationException("Unimplemented in mock");
+            }
+
+            public InetAddress[] getAllByName(String host)
+                    throws UnknownHostException {
+                throw new UnsupportedOperationException("Unimplemented in mock");
+            }
+
+            public InetAddress getByName(String host)
+                    throws UnknownHostException {
+                if (BAD_DOMAIN1.equals(host)) {
+                    return InetAddress.getByName("127.0.0.1");
+                } else if (BAD_DOMAIN2.equals(host)) {
+                    return InetAddress.getByName("127.0.0.1");
+                } else if (GOOD_DOMAIN.equals(host)) {
+                    return InetAddress.getByName("fesdgaeg.deger");
+                }
+                return InetAddress.getByName(host);
+            }
+        };
+        
+        return mockedDnsServer;
+    }
+    
+    public void testNotBlocked() throws IOException, MessagingException {
+
+        
+        ArrayList servers = new ArrayList();
+        servers.add("multi.surbl.org");
+        
+        SMTPSession session = setupMockedSMTPSession(setupMockedMail(setupMockedMimeMessage("http://" + GOOD_DOMAIN + "/")));
+
+        URIRBLHandler handler = new URIRBLHandler();
+
+        ContainerUtil.enableLogging(handler, new MockLogger());
+        handler.setDnsServer(setupMockedDnsServer());
+        handler.setUriRblServer(servers);
+        handler.onMessage(session);
+
+        assertNull("Email was not rejected", getResponse());
+    }
+    
+    public void testBlocked() throws IOException, MessagingException {
+
+        
+        ArrayList servers = new ArrayList();
+        servers.add("multi.surbl.org");
+        
+        SMTPSession session = setupMockedSMTPSession(setupMockedMail(setupMockedMimeMessage("http://" + BAD_DOMAIN1 + "/")));
+
+        URIRBLHandler handler = new URIRBLHandler();
+
+        ContainerUtil.enableLogging(handler, new MockLogger());
+        handler.setDnsServer(setupMockedDnsServer());
+        handler.setUriRblServer(servers);
+        handler.onMessage(session);
+
+        assertNull("Email was rejected", getResponse());
+    }
+}



---------------------------------------------------------------------
To unsubscribe, e-mail: server-dev-unsubscribe@james.apache.org
For additional commands, e-mail: server-dev-help@james.apache.org