You are viewing a plain text version of this content. The canonical link for it is here.
Posted to server-dev@james.apache.org by no...@apache.org on 2006/09/08 22:08:34 UTC
svn commit: r441631 - in /james/server/trunk/src:
java/org/apache/james/smtpserver/core/filter/fastfail/
java/org/apache/james/util/urirbl/ test/org/apache/james/smtpserver/
Author: norman
Date: Fri Sep 8 13:08:33 2006
New Revision: 441631
URL: http://svn.apache.org/viewvc?view=rev&rev=441631
Log:
Add URIRBLHandler support based on a contribution. Thx to Mike Bryant. See JAMES-610
Added:
james/server/trunk/src/java/org/apache/james/smtpserver/core/filter/fastfail/URIRBLHandler.java
james/server/trunk/src/java/org/apache/james/util/urirbl/
james/server/trunk/src/java/org/apache/james/util/urirbl/TLDLookup.java
james/server/trunk/src/java/org/apache/james/util/urirbl/URIScanner.java
james/server/trunk/src/test/org/apache/james/smtpserver/URIRBLHandlerTest.java
Added: james/server/trunk/src/java/org/apache/james/smtpserver/core/filter/fastfail/URIRBLHandler.java
URL: http://svn.apache.org/viewvc/james/server/trunk/src/java/org/apache/james/smtpserver/core/filter/fastfail/URIRBLHandler.java?view=auto&rev=441631
==============================================================================
--- james/server/trunk/src/java/org/apache/james/smtpserver/core/filter/fastfail/URIRBLHandler.java (added)
+++ james/server/trunk/src/java/org/apache/james/smtpserver/core/filter/fastfail/URIRBLHandler.java Fri Sep 8 13:08:33 2006
@@ -0,0 +1,256 @@
+/****************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one *
+ * or more contributor license agreements. See the NOTICE file *
+ * distributed with this work for additional information *
+ * regarding copyright ownership. The ASF licenses this file *
+ * to you under the Apache License, Version 2.0 (the *
+ * "License"); you may not use this file except in compliance *
+ * with the License. You may obtain a copy of the License at *
+ * *
+ * http://www.apache.org/licenses/LICENSE-2.0 *
+ * *
+ * Unless required by applicable law or agreed to in writing, *
+ * software distributed under the License is distributed on an *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY *
+ * KIND, either express or implied. See the License for the *
+ * specific language governing permissions and limitations *
+ * under the License. *
+ ****************************************************************/
+
+
+
+
+package org.apache.james.smtpserver.core.filter.fastfail;
+
+import java.io.IOException;
+import java.net.UnknownHostException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.Iterator;
+
+import javax.mail.MessagingException;
+import javax.mail.internet.MimeBodyPart;
+import javax.mail.internet.MimeMessage;
+import javax.mail.internet.MimeMultipart;
+import javax.mail.internet.MimePart;
+
+import org.apache.avalon.framework.configuration.Configurable;
+import org.apache.avalon.framework.configuration.Configuration;
+import org.apache.avalon.framework.configuration.ConfigurationException;
+import org.apache.avalon.framework.logger.AbstractLogEnabled;
+import org.apache.avalon.framework.service.ServiceException;
+import org.apache.avalon.framework.service.ServiceManager;
+import org.apache.avalon.framework.service.Serviceable;
+import org.apache.james.services.DNSServer;
+import org.apache.james.smtpserver.MessageHandler;
+import org.apache.james.smtpserver.SMTPSession;
+import org.apache.james.util.mail.dsn.DSNStatus;
+import org.apache.james.util.urirbl.URIScanner;
+
+/**
+ * Extract domains from message and check against URIRBLServer
+ */
+public class URIRBLHandler extends AbstractLogEnabled implements MessageHandler,
+ Serviceable, Configurable {
+
+ private DNSServer dnsServer;
+
+ private Collection uriRbl;
+
+ private boolean getDetail = false;
+
+ private boolean checkAuthNetworks = false;
+
+ /**
+ * @see org.apache.avalon.framework.service.Serviceable#service(ServiceManager)
+ */
+ public void service(ServiceManager serviceMan) throws ServiceException {
+ setDnsServer((DNSServer) serviceMan.lookup(DNSServer.ROLE));
+ }
+
+ /**
+ * @see org.apache.avalon.framework.configuration.Configurable#configure(Configuration)
+ */
+ public void configure(Configuration arg0) throws ConfigurationException {
+ boolean invalidConfig = false;
+
+ Configuration serverConfiguration = arg0.getChild("uriRblServers", false);
+ if ( serverConfiguration != null ) {
+ ArrayList serverCollection = new ArrayList();
+ Configuration[] children = serverConfiguration.getChildren("server");
+ if ( children != null ) {
+ for ( int i = 0 ; i < children.length ; i++ ) {
+ String rblServerName = children[i].getValue();
+ serverCollection.add(rblServerName);
+ if (getLogger().isInfoEnabled()) {
+ getLogger().info("Adding uriRBL server: " + rblServerName);
+ }
+ }
+ if (serverCollection != null && serverCollection.size() > 0) {
+ setUriRblServer(serverCollection);
+ } else {
+ invalidConfig = true;
+ }
+ }
+ } else {
+ invalidConfig = true;
+ }
+
+ if (invalidConfig == true) {
+ throw new ConfigurationException("Please provide at least one server");
+ }
+
+ Configuration configuration = arg0.getChild("getDetail",false);
+ if(configuration != null) {
+ getDetail = configuration.getValueAsBoolean();
+ }
+
+ Configuration configRelay = arg0.getChild("checkAuthNetworks", false);
+ if (configRelay != null) {
+ setCheckAuthNetworks(configRelay.getValueAsBoolean(false));
+ }
+
+ }
+
+ /**
+ * Set the UriRBL Servers
+ *
+ * @param uriRbl The Collection holding the servers
+ */
+ public void setUriRblServer(Collection uriRbl) {
+ this.uriRbl = uriRbl;
+ }
+
+ /**
+ * Set to true if AuthNetworks should be included in the EHLO check
+ *
+ * @param checkAuthNetworks
+ * Set to true to enable
+ */
+ public void setCheckAuthNetworks(boolean checkAuthNetworks) {
+ this.checkAuthNetworks = checkAuthNetworks;
+ }
+
+ /**
+ * Set the DNSServer
+ *
+ * @param dnsServer
+ * The DNSServer
+ */
+ public void setDnsServer(DNSServer dnsServer) {
+ this.dnsServer = dnsServer;
+ }
+
+ /**
+ * Set for try to get a TXT record for the blocked record.
+ *
+ * @param getDetail Set to ture for enable
+ */
+ public void setGetDetail(boolean getDetail) {
+ this.getDetail = getDetail;
+ }
+
+ /**
+ * @see org.apache.james.smtpserver.MessageHandler#onMessage(SMTPSession)
+ */
+ public void onMessage(SMTPSession session) {
+ MimeMessage message;
+
+ // Not scan the message if relaying allowed
+ if (session.isRelayingAllowed() && !checkAuthNetworks) {
+ return;
+ }
+
+ try {
+ message = session.getMail().getMessage();
+
+ HashSet domains = scanMailForDomains(message);
+
+ Iterator fDomains = domains.iterator();
+ Iterator uRbl = uriRbl.iterator();
+
+ while (fDomains.hasNext()) {
+ String target = fDomains.next().toString();
+
+ while (uRbl.hasNext()) {
+ try {
+ String responseString = null;
+ String detail = null;
+ String uRblServer = uRbl.next().toString();
+ dnsServer.getByName(target + "." + uRblServer);
+
+ if (getLogger().isInfoEnabled()) {
+ getLogger().info("Message restricted by " + uRblServer + " to SMTP AUTH/postmaster/abuse.");
+ }
+
+ // we should try to retrieve details
+ if (getDetail) {
+ Collection txt = dnsServer.findTXTRecords(target + "." + uRbl.next());
+
+ // Check if we found a txt record
+ if (!txt.isEmpty()) {
+ // Set the detail
+ detail = txt.iterator().next().toString();
+
+ }
+ }
+
+ if (detail != null) {
+ responseString = "530 "
+ + DSNStatus.getStatus(DSNStatus.PERMANENT, DSNStatus.SECURITY_AUTH) + " "
+ + detail;
+ } else {
+ responseString = "530 "
+ + DSNStatus.getStatus(DSNStatus.PERMANENT, DSNStatus.SECURITY_AUTH)
+ + " Rejected: unauthenticated e-mail from "
+ + session.getRemoteIPAddress()
+ + " is restricted. Contact the postmaster for details.";
+ }
+
+ session.writeResponse(responseString);
+ session.abortMessage();
+ session.setStopHandlerProcessing(true);
+
+ } catch (UnknownHostException uhe) {
+ // domain not found. keep processing
+ }
+ }
+ }
+ } catch (MessagingException e) {
+ getLogger().error(e.getMessage());
+ } catch (IOException e) {
+ getLogger().error(e.getMessage());
+ }
+ }
+
+ /**
+ * Recursively scans all MimeParts of an email for domain strings. Domain
+ * strings that are found are added to the supplied HashSet.
+ *
+ * @param domains HashSet for accumulating domain strings
+ * @param part MimePart to scan
+ */
+ private HashSet scanMailForDomains(MimePart part)
+ throws MessagingException, IOException {
+ HashSet domains = new HashSet();
+ getLogger().debug("mime type is: \"" + part.getContentType() + "\"");
+
+ if (part.isMimeType("text/plain") || part.isMimeType("text/html")) {
+ getLogger().debug("scanning: \"" + part.getContent().toString() + "\"");
+ URIScanner.scanContentForDomains(domains, part.getContent().toString());
+ } else if (part.isMimeType("multipart/*")) {
+ MimeMultipart multipart = (MimeMultipart) part.getContent();
+ int count = multipart.getCount();
+ getLogger().debug("multipart count is: " + count);
+
+ for (int index = 0; index < count; index++) {
+ getLogger().debug("recursing index: " + index);
+ MimeBodyPart mimeBodyPart = (MimeBodyPart) multipart.getBodyPart(index);
+ domains = scanMailForDomains(mimeBodyPart);
+ }
+ }
+ return domains;
+ }
+
+}
Added: james/server/trunk/src/java/org/apache/james/util/urirbl/TLDLookup.java
URL: http://svn.apache.org/viewvc/james/server/trunk/src/java/org/apache/james/util/urirbl/TLDLookup.java?view=auto&rev=441631
==============================================================================
--- james/server/trunk/src/java/org/apache/james/util/urirbl/TLDLookup.java (added)
+++ james/server/trunk/src/java/org/apache/james/util/urirbl/TLDLookup.java Fri Sep 8 13:08:33 2006
@@ -0,0 +1,1091 @@
+/****************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one *
+ * or more contributor license agreements. See the NOTICE file *
+ * distributed with this work for additional information *
+ * regarding copyright ownership. The ASF licenses this file *
+ * to you under the Apache License, Version 2.0 (the *
+ * "License"); you may not use this file except in compliance *
+ * with the License. You may obtain a copy of the License at *
+ * *
+ * http://www.apache.org/licenses/LICENSE-2.0 *
+ * *
+ * Unless required by applicable law or agreed to in writing, *
+ * software distributed under the License is distributed on an *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY *
+ * KIND, either express or implied. See the License for the *
+ * specific language governing permissions and limitations *
+ * under the License. *
+ ****************************************************************/
+
+
+
+
+package org.apache.james.util.urirbl;
+
+import java.util.HashSet;
+
+/**
+ * A utility class that caches sets of multi-part top level domains (TLDs) for
+ * quick lookup.
+ */
+public class TLDLookup {
+
+ /** Simple regular expression to match strings in the cache. Note: if the
+ collection of known mult-part TLDs change to contain characters other
+ than these, this string must be modified. */
+ static private final String tld = "[A-Za-z0-9\\-]*";
+
+ /** Simple regular expression that matches a two-part TLD */
+ static private final String tld2 = tld+"\\."+tld;
+
+ /** Simple regular expression that matches a three-part TLD */
+ static private final String tld3 = tld+"\\."+tld+"\\."+tld;
+
+ /** Array of all known multi-level TLDs */
+ static private final String[] multiPartTLDs = initMultiPartTLDs();
+
+ /** A set of all known two-part TLDs */
+ static private final HashSet twoPartTLDs = initTwoPartTLDs();
+
+ /** A set of all known three-part TLDs */
+ static private final HashSet threePartTLDs = initThreePartTLDs();
+
+ /** controls testing/debug output */
+ static private boolean testing = false;
+
+ /**
+ * Determines if a two-part domain string (xxx.xxx) is contained in the
+ * cache of known two-part TLDs.
+ *
+ * @param domain a String representing a two-part domain
+ * @return true if the domain string is found in the cache, false otherwise
+ */
+ static public boolean isTwoPartTLD(String domain) {
+ return twoPartTLDs.contains(domain);
+ }
+
+ /**
+ * Determines if a three-part domain string (xxx.xxx.xxx) is contained in
+ * the cache of known three-part TLDs.
+ *
+ * @param domain a String representing a three-part domain
+ * @return true if the domain string is found in the cache, false otherwise
+ */
+ static public boolean isThreePartTLD(String domain) {
+ return threePartTLDs.contains(domain);
+ }
+
+ /**
+ * Initialize two-part top-level domain cache.
+ *
+ * @return a HashSet containing all known two-part TLDs
+ */
+ static private HashSet initTwoPartTLDs() {
+ HashSet set = new HashSet(900);
+ for (int i=0; i<multiPartTLDs.length; i++) {
+ try {
+ if (multiPartTLDs[i].matches("^"+tld2+"$")) {
+ set.add(multiPartTLDs[i]);
+ }
+ }
+ catch (Exception ex) {
+ debugOut(ex);
+ }
+ }
+ debugOut("initTwoPartTLDs size="+set.size());
+ return set;
+ }
+
+ /**
+ * Initialize three-part top-level domain cache.
+ *
+ * @return a HashSet containing all known three-part TLDs
+ */
+ static private HashSet initThreePartTLDs() {
+ HashSet set = new HashSet();
+ for (int i=0; i<multiPartTLDs.length; i++) {
+ try {
+ if (multiPartTLDs[i].matches("^"+tld3+"$")) {
+ debugOut("adding \"" + multiPartTLDs[i] + "\"");
+ set.add(multiPartTLDs[i]);
+ }
+ }
+ catch (Exception ex) {
+ debugOut(ex);
+ }
+ }
+ debugOut("initThreePartTLDs size="+set.size());
+ return set;
+ }
+
+ /**
+ * Initialize an array of Strings containing all known multi-part TLDs
+ *
+ * @return an array of all known multi-part TLDs
+ */
+ static private String[] initMultiPartTLDs() {
+ String[] tmp = new String[] {
+ "com.ac",
+ "edu.ac",
+ "gov.ac",
+ "edu.ai",
+ "gov.ai",
+ "com.ar",
+ "net.ar",
+ "org.ar",
+ "gov.ar",
+ "mil.ar",
+ "edu.ar",
+ "int.ar",
+ "co.at",
+ "ac.at",
+ "or.at",
+ "gv.at",
+ "priv.at",
+ "com.au",
+ "gov.au",
+ "org.au",
+ "edu.au",
+ "id.au",
+ "oz.au",
+ "info.au",
+ "net.au",
+ "asn.au",
+ "csiro.au",
+ "telememo.au",
+ "conf.au",
+ "otc.au",
+ "com.az",
+ "net.az",
+ "org.az",
+ "com.bb",
+ "net.bb",
+ "org.bb",
+ "ac.be",
+ "belgie.be",
+ "dns.be",
+ "fgov.be",
+ "com.bh",
+ "gov.bh",
+ "net.bh",
+ "edu.bh",
+ "org.bh",
+ "com.bm",
+ "edu.bm",
+ "gov.bm",
+ "org.bm",
+ "net.bm",
+ "adm.br",
+ "adv.br",
+ "agr.br",
+ "am.br",
+ "arq.br",
+ "art.br",
+ "ato.br",
+ "bio.br",
+ "bmd.br",
+ "cim.br",
+ "cng.br",
+ "cnt.br",
+ "com.br",
+ "coop.br",
+ "ecn.br",
+ "edu.br",
+ "eng.br",
+ "esp.br",
+ "etc.br",
+ "eti.br",
+ "far.br",
+ "fm.br",
+ "fnd.br",
+ "fot.br",
+ "fst.br",
+ "g12.br",
+ "ggf.br",
+ "gov.br",
+ "imb.br",
+ "ind.br",
+ "inf.br",
+ "jor.br",
+ "lel.br",
+ "mat.br",
+ "med.br",
+ "mil.br",
+ "mus.br",
+ "net.br",
+ "nom.br",
+ "not.br",
+ "ntr.br",
+ "odo.br",
+ "org.br",
+ "ppg.br",
+ "pro.br",
+ "psc.br",
+ "psi.br",
+ "qsl.br",
+ "rec.br",
+ "slg.br",
+ "srv.br",
+ "tmp.br",
+ "trd.br",
+ "tur.br",
+ "tv.br",
+ "vet.br",
+ "zlg.br",
+ "com.bs",
+ "net.bs",
+ "org.bs",
+ "ab.ca",
+ "bc.ca",
+ "mb.ca",
+ "nb.ca",
+ "nf.ca",
+ "nl.ca",
+ "ns.ca",
+ "nt.ca",
+ "nu.ca",
+ "on.ca",
+ "pe.ca",
+ "qc.ca",
+ "sk.ca",
+ "yk.ca",
+ "co.ck",
+ "net.ck",
+ "org.ck",
+ "edu.ck",
+ "gov.ck",
+ "com.cn",
+ "edu.cn",
+ "gov.cn",
+ "net.cn",
+ "org.cn",
+ "ac.cn",
+ "ah.cn",
+ "bj.cn",
+ "cq.cn",
+ "gd.cn",
+ "gs.cn",
+ "gx.cn",
+ "gz.cn",
+ "hb.cn",
+ "he.cn",
+ "hi.cn",
+ "hk.cn",
+ "hl.cn",
+ "hn.cn",
+ "jl.cn",
+ "js.cn",
+ "ln.cn",
+ "mo.cn",
+ "nm.cn",
+ "nx.cn",
+ "qh.cn",
+ "sc.cn",
+ "sn.cn",
+ "sh.cn",
+ "sx.cn",
+ "tj.cn",
+ "tw.cn",
+ "xj.cn",
+ "xz.cn",
+ "yn.cn",
+ "zj.cn",
+ "arts.co",
+ "com.co",
+ "edu.co",
+ "firm.co",
+ "gov.co",
+ "info.co",
+ "int.co",
+ "nom.co",
+ "mil.co",
+ "org.co",
+ "rec.co",
+ "store.co",
+ "web.co",
+ "ac.cr",
+ "co.cr",
+ "ed.cr",
+ "fi.cr",
+ "go.cr",
+ "or.cr",
+ "sa.cr",
+ "com.cu",
+ "net.cu",
+ "org.cu",
+ "ac.cy",
+ "com.cy",
+ "gov.cy",
+ "net.cy",
+ "org.cy",
+ "co.dk",
+ "art.do",
+ "com.do",
+ "edu.do",
+ "gov.do",
+ "org.do",
+ "mil.do",
+ "net.do",
+ "web.do",
+ "com.dz",
+ "org.dz",
+ "net.dz",
+ "gov.dz",
+ "edu.dz",
+ "ass.dz",
+ "pol.dz",
+ "art.dz",
+ "com.ec",
+ "k12.ec",
+ "edu.ec",
+ "fin.ec",
+ "med.ec",
+ "gov.ec",
+ "mil.ec",
+ "org.ec",
+ "net.ec",
+ "com.eg",
+ "edu.eg",
+ "eun.eg",
+ "gov.eg",
+ "net.eg",
+ "org.eg",
+ "sci.eg",
+ "com.er",
+ "net.er",
+ "org.er",
+ "edu.er",
+ "mil.er",
+ "gov.er",
+ "ind.er",
+ "com.et",
+ "gov.et",
+ "org.et",
+ "edu.et",
+ "net.et",
+ "biz.et",
+ "name.et",
+ "info.et",
+ "ac.fj",
+ "com.fj",
+ "gov.fj",
+ "id.fj",
+ "org.fj",
+ "school.fj",
+ "com.fk",
+ "ac.fk",
+ "gov.fk",
+ "net.fk",
+ "nom.fk",
+ "org.fk",
+ "asso.fr",
+ "nom.fr",
+ "barreau.fr",
+ "com.fr",
+ "prd.fr",
+ "presse.fr",
+ "tm.fr",
+ "aeroport.fr",
+ "assedic.fr",
+ "avocat.fr",
+ "avoues.fr",
+ "cci.fr",
+ "chambagri.fr",
+ "chirurgiens-dentistes.fr",
+ "experts-comptables.fr",
+ "geometre-expert.fr",
+ "gouv.fr",
+ "greta.fr",
+ "huissier-justice.fr",
+ "medecin.fr",
+ "notaires.fr",
+ "pharmacien.fr",
+ "port.fr",
+ "veterinaire.fr",
+ "com.ge",
+ "edu.ge",
+ "gov.ge",
+ "mil.ge",
+ "net.ge",
+ "org.ge",
+ "pvt.ge",
+ "co.gg",
+ "org.gg",
+ "sch.gg",
+ "ac.gg",
+ "gov.gg",
+ "ltd.gg",
+ "ind.gg",
+ "net.gg",
+ "alderney.gg",
+ "guernsey.gg",
+ "sark.gg",
+ "com.gu",
+ "edu.gu",
+ "net.gu",
+ "org.gu",
+ "gov.gu",
+ "mil.gu",
+ "com.hk",
+ "net.hk",
+ "org.hk",
+ "idv.hk",
+ "gov.hk",
+ "edu.hk",
+ "co.hu",
+ "2000.hu",
+ "erotika.hu",
+ "jogasz.hu",
+ "sex.hu",
+ "video.hu",
+ "info.hu",
+ "agrar.hu",
+ "film.hu",
+ "konyvelo.hu",
+ "shop.hu",
+ "org.hu",
+ "bolt.hu",
+ "forum.hu",
+ "lakas.hu",
+ "suli.hu",
+ "priv.hu",
+ "casino.hu",
+ "games.hu",
+ "media.hu",
+ "szex.hu",
+ "sport.hu",
+ "city.hu",
+ "hotel.hu",
+ "news.hu",
+ "tozsde.hu",
+ "tm.hu",
+ "erotica.hu",
+ "ingatlan.hu",
+ "reklam.hu",
+ "utazas.hu",
+ "ac.id",
+ "co.id",
+ "go.id",
+ "mil.id",
+ "net.id",
+ "or.id",
+ "co.il",
+ "net.il",
+ "org.il",
+ "ac.il",
+ "gov.il",
+ "k12.il",
+ "muni.il",
+ "idf.il",
+ "co.im",
+ "net.im",
+ "org.im",
+ "ac.im",
+ "lkd.co.im",
+ "gov.im",
+ "nic.im",
+ "plc.co.im",
+ "co.in",
+ "net.in",
+ "ac.in",
+ "ernet.in",
+ "gov.in",
+ "nic.in",
+ "res.in",
+ "gen.in",
+ "firm.in",
+ "mil.in",
+ "org.in",
+ "ind.in",
+ "ac.je",
+ "co.je",
+ "net.je",
+ "org.je",
+ "gov.je",
+ "ind.je",
+ "jersey.je",
+ "ltd.je",
+ "sch.je",
+ "com.jo",
+ "org.jo",
+ "net.jo",
+ "gov.jo",
+ "edu.jo",
+ "mil.jo",
+ "ad.jp",
+ "ac.jp",
+ "co.jp",
+ "go.jp",
+ "or.jp",
+ "ne.jp",
+ "gr.jp",
+ "ed.jp",
+ "lg.jp",
+ "net.jp",
+ "org.jp",
+ "gov.jp",
+ "hokkaido.jp",
+ "aomori.jp",
+ "iwate.jp",
+ "miyagi.jp",
+ "akita.jp",
+ "yamagata.jp",
+ "fukushima.jp",
+ "ibaraki.jp",
+ "tochigi.jp",
+ "gunma.jp",
+ "saitama.jp",
+ "chiba.jp",
+ "tokyo.jp",
+ "kanagawa.jp",
+ "niigata.jp",
+ "toyama.jp",
+ "ishikawa.jp",
+ "fukui.jp",
+ "yamanashi.jp",
+ "nagano.jp",
+ "gifu.jp",
+ "shizuoka.jp",
+ "aichi.jp",
+ "mie.jp",
+ "shiga.jp",
+ "kyoto.jp",
+ "osaka.jp",
+ "hyogo.jp",
+ "nara.jp",
+ "wakayama.jp",
+ "tottori.jp",
+ "shimane.jp",
+ "okayama.jp",
+ "hiroshima.jp",
+ "yamaguchi.jp",
+ "tokushima.jp",
+ "kagawa.jp",
+ "ehime.jp",
+ "kochi.jp",
+ "fukuoka.jp",
+ "saga.jp",
+ "nagasaki.jp",
+ "kumamoto.jp",
+ "oita.jp",
+ "miyazaki.jp",
+ "kagoshima.jp",
+ "okinawa.jp",
+ "sapporo.jp",
+ "sendai.jp",
+ "yokohama.jp",
+ "kawasaki.jp",
+ "nagoya.jp",
+ "kobe.jp",
+ "kitakyushu.jp",
+ "utsunomiya.jp",
+ "kanazawa.jp",
+ "takamatsu.jp",
+ "matsuyama.jp",
+ "com.kh",
+ "net.kh",
+ "org.kh",
+ "per.kh",
+ "edu.kh",
+ "gov.kh",
+ "mil.kh",
+ "ac.kr",
+ "co.kr",
+ "go.kr",
+ "ne.kr",
+ "or.kr",
+ "pe.kr",
+ "re.kr",
+ "seoul.kr",
+ "kyonggi.kr",
+ "com.kw",
+ "net.kw",
+ "org.kw",
+ "edu.kw",
+ "gov.kw",
+ "com.la",
+ "net.la",
+ "org.la",
+ "com.lb",
+ "org.lb",
+ "net.lb",
+ "edu.lb",
+ "gov.lb",
+ "mil.lb",
+ "com.lc",
+ "edu.lc",
+ "gov.lc",
+ "net.lc",
+ "org.lc",
+ "com.lv",
+ "net.lv",
+ "org.lv",
+ "edu.lv",
+ "gov.lv",
+ "mil.lv",
+ "id.lv",
+ "asn.lv",
+ "conf.lv",
+ "com.ly",
+ "net.ly",
+ "org.ly",
+ "co.ma",
+ "net.ma",
+ "org.ma",
+ "press.ma",
+ "ac.ma",
+ "com.mk",
+ "com.mm",
+ "net.mm",
+ "org.mm",
+ "edu.mm",
+ "gov.mm",
+ "com.mo",
+ "net.mo",
+ "org.mo",
+ "edu.mo",
+ "gov.mo",
+ "com.mt",
+ "net.mt",
+ "org.mt",
+ "edu.mt",
+ "tm.mt",
+ "uu.mt",
+ "com.mx",
+ "net.mx",
+ "org.mx",
+ "com.my",
+ "org.my",
+ "gov.my",
+ "edu.my",
+ "net.my",
+ "com.na",
+ "org.na",
+ "net.na",
+ "alt.na",
+ "edu.na",
+ "cul.na",
+ "unam.na",
+ "telecom.na",
+ "com.nc",
+ "net.nc",
+ "org.nc",
+ "ac.ng",
+ "edu.ng",
+ "sch.ng",
+ "com.ng",
+ "gov.ng",
+ "org.ng",
+ "net.ng",
+ "gob.ni",
+ "com.ni",
+ "net.ni",
+ "edu.ni",
+ "nom.ni",
+ "org.ni",
+ "com.np",
+ "net.np",
+ "org.np",
+ "gov.np",
+ "edu.np",
+ "ac.nz",
+ "co.nz",
+ "cri.nz",
+ "gen.nz",
+ "geek.nz",
+ "govt.nz",
+ "iwi.nz",
+ "maori.nz",
+ "mil.nz",
+ "net.nz",
+ "org.nz",
+ "school.nz",
+ "com.om",
+ "co.om",
+ "edu.om",
+ "ac.om",
+ "gov.om",
+ "net.om",
+ "org.om",
+ "mod.om",
+ "museum.om",
+ "biz.om",
+ "pro.om",
+ "med.om",
+ "com.pa",
+ "net.pa",
+ "org.pa",
+ "edu.pa",
+ "ac.pa",
+ "gob.pa",
+ "sld.pa",
+ "edu.pe",
+ "gob.pe",
+ "nom.pe",
+ "mil.pe",
+ "org.pe",
+ "com.pe",
+ "net.pe",
+ "com.pg",
+ "net.pg",
+ "ac.pg",
+ "com.ph",
+ "net.ph",
+ "org.ph",
+ "mil.ph",
+ "ngo.ph",
+ "aid.pl",
+ "agro.pl",
+ "atm.pl",
+ "auto.pl",
+ "biz.pl",
+ "com.pl",
+ "edu.pl",
+ "gmina.pl",
+ "gsm.pl",
+ "info.pl",
+ "mail.pl",
+ "miasta.pl",
+ "media.pl",
+ "mil.pl",
+ "net.pl",
+ "nieruchomosci.pl",
+ "nom.pl",
+ "org.pl",
+ "pc.pl",
+ "powiat.pl",
+ "priv.pl",
+ "realestate.pl",
+ "rel.pl",
+ "sex.pl",
+ "shop.pl",
+ "sklep.pl",
+ "sos.pl",
+ "szkola.pl",
+ "targi.pl",
+ "tm.pl",
+ "tourism.pl",
+ "travel.pl",
+ "turystyka.pl",
+ "com.pk",
+ "net.pk",
+ "edu.pk",
+ "org.pk",
+ "fam.pk",
+ "biz.pk",
+ "web.pk",
+ "gov.pk",
+ "gob.pk",
+ "gok.pk",
+ "gon.pk",
+ "gop.pk",
+ "gos.pk",
+ "edu.ps",
+ "gov.ps",
+ "plo.ps",
+ "sec.ps",
+ "com.py",
+ "net.py",
+ "org.py",
+ "edu.py",
+ "com.qa",
+ "net.qa",
+ "org.qa",
+ "edu.qa",
+ "gov.qa",
+ "asso.re",
+ "com.re",
+ "nom.re",
+ "com.ru",
+ "net.ru",
+ "org.ru",
+ "pp.ru",
+ "com.sa",
+ "edu.sa",
+ "sch.sa",
+ "med.sa",
+ "gov.sa",
+ "net.sa",
+ "org.sa",
+ "pub.sa",
+ "com.sb",
+ "net.sb",
+ "org.sb",
+ "edu.sb",
+ "gov.sb",
+ "com.sd",
+ "net.sd",
+ "org.sd",
+ "edu.sd",
+ "sch.sd",
+ "med.sd",
+ "gov.sd",
+ "tm.se",
+ "press.se",
+ "parti.se",
+ "brand.se",
+ "fh.se",
+ "fhsk.se",
+ "fhv.se",
+ "komforb.se",
+ "kommunalforbund.se",
+ "komvux.se",
+ "lanarb.se",
+ "lanbib.se",
+ "naturbruksgymn.se",
+ "sshn.se",
+ "org.se",
+ "pp.se",
+ "com.sg",
+ "net.sg",
+ "org.sg",
+ "edu.sg",
+ "gov.sg",
+ "per.sg",
+ "com.sh",
+ "net.sh",
+ "org.sh",
+ "edu.sh",
+ "gov.sh",
+ "mil.sh",
+ "gov.st",
+ "saotome.st",
+ "principe.st",
+ "consulado.st",
+ "embaixada.st",
+ "org.st",
+ "edu.st",
+ "net.st",
+ "com.st",
+ "store.st",
+ "mil.st",
+ "co.st",
+ "com.sv",
+ "org.sv",
+ "edu.sv",
+ "gob.sv",
+ "red.sv",
+ "com.sy",
+ "net.sy",
+ "org.sy",
+ "gov.sy",
+ "ac.th",
+ "co.th",
+ "go.th",
+ "net.th",
+ "or.th",
+ "com.tn",
+ "net.tn",
+ "org.tn",
+ "edunet.tn",
+ "gov.tn",
+ "ens.tn",
+ "fin.tn",
+ "nat.tn",
+ "ind.tn",
+ "info.tn",
+ "intl.tn",
+ "rnrt.tn",
+ "rnu.tn",
+ "rns.tn",
+ "tourism.tn",
+ "com.tr",
+ "net.tr",
+ "org.tr",
+ "edu.tr",
+ "gov.tr",
+ "mil.tr",
+ "bbs.tr",
+ "k12.tr",
+ "gen.tr",
+ "co.tt",
+ "com.tt",
+ "org.tt",
+ "net.tt",
+ "biz.tt",
+ "info.tt",
+ "pro.tt",
+ "name.tt",
+ "gov.tt",
+ "edu.tt",
+ "nic.tt",
+ "us.tt",
+ "uk.tt",
+ "ca.tt",
+ "eu.tt",
+ "es.tt",
+ "fr.tt",
+ "it.tt",
+ "se.tt",
+ "dk.tt",
+ "be.tt",
+ "de.tt",
+ "at.tt",
+ "au.tt",
+ "co.tv",
+ "com.tw",
+ "net.tw",
+ "org.tw",
+ "edu.tw",
+ "idv.tw",
+ "gove.tw",
+ "com.ua",
+ "net.ua",
+ "org.ua",
+ "edu.ua",
+ "gov.ua",
+ "ac.ug",
+ "co.ug",
+ "or.ug",
+ "go.ug",
+ "co.uk",
+ "me.uk",
+ "org.uk",
+ "edu.uk",
+ "ltd.uk",
+ "plc.uk",
+ "net.uk",
+ "sch.uk",
+ "nic.uk",
+ "ac.uk",
+ "gov.uk",
+ "nhs.uk",
+ "police.uk",
+ "mod.uk",
+ "dni.us",
+ "fed.us",
+ "com.uy",
+ "edu.uy",
+ "net.uy",
+ "org.uy",
+ "gub.uy",
+ "mil.uy",
+ "com.ve",
+ "net.ve",
+ "org.ve",
+ "co.ve",
+ "edu.ve",
+ "gov.ve",
+ "mil.ve",
+ "arts.ve",
+ "bib.ve",
+ "firm.ve",
+ "info.ve",
+ "int.ve",
+ "nom.ve",
+ "rec.ve",
+ "store.ve",
+ "tec.ve",
+ "web.ve",
+ "co.vi",
+ "net.vi",
+ "org.vi",
+ "com.vn",
+ "biz.vn",
+ "edu.vn",
+ "gov.vn",
+ "net.vn",
+ "org.vn",
+ "int.vn",
+ "ac.vn",
+ "pro.vn",
+ "info.vn",
+ "health.vn",
+ "name.vn",
+ "com.vu",
+ "edu.vu",
+ "net.vu",
+ "org.vu",
+ "de.vu",
+ "ch.vu",
+ "fr.vu",
+ "com.ws",
+ "net.ws",
+ "org.ws",
+ "gov.ws",
+ "edu.ws",
+ "ac.yu",
+ "co.yu",
+ "edu.yu",
+ "org.yu",
+ "com.ye",
+ "net.ye",
+ "org.ye",
+ "gov.ye",
+ "edu.ye",
+ "mil.ye",
+ "ac.za",
+ "alt.za",
+ "bourse.za",
+ "city.za",
+ "co.za",
+ "edu.za",
+ "gov.za",
+ "law.za",
+ "mil.za",
+ "net.za",
+ "ngo.za",
+ "nom.za",
+ "org.za",
+ "school.za",
+ "tm.za",
+ "web.za",
+ "co.zw",
+ "ac.zw",
+ "org.zw",
+ "gov.zw",
+ "eu.org",
+ "au.com",
+ "br.com",
+ "cn.com",
+ "de.com",
+ "de.net",
+ "eu.com",
+ "gb.com",
+ "gb.net",
+ "hu.com",
+ "no.com",
+ "qc.com",
+ "ru.com",
+ "sa.com",
+ "se.com",
+ "uk.com",
+ "uk.net",
+ "us.com",
+ "uy.com",
+ "za.com",
+ "dk.org",
+ "tel.no",
+ "fax.nr",
+ "mob.nr",
+ "mobil.nr",
+ "mobile.nr",
+ "tel.nr",
+ "tlf.nr",
+ "e164.arpa"
+ };
+ debugOut("array size=" + tmp.length);
+ return tmp;
+ }
+
+ /**
+ * Debugging output
+ */
+ private static void debugOut(String msg) {
+ if (true == testing) {
+ System.out.println(msg);
+ }
+ }
+
+ /**
+ * Debugging output
+ */
+ private static void debugOut(Throwable th) {
+ if (true == testing) {
+ System.out.println(th);
+ }
+ }
+}
+
+
+
Added: james/server/trunk/src/java/org/apache/james/util/urirbl/URIScanner.java
URL: http://svn.apache.org/viewvc/james/server/trunk/src/java/org/apache/james/util/urirbl/URIScanner.java?view=auto&rev=441631
==============================================================================
--- james/server/trunk/src/java/org/apache/james/util/urirbl/URIScanner.java (added)
+++ james/server/trunk/src/java/org/apache/james/util/urirbl/URIScanner.java Fri Sep 8 13:08:33 2006
@@ -0,0 +1,378 @@
+/****************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one *
+ * or more contributor license agreements. See the NOTICE file *
+ * distributed with this work for additional information *
+ * regarding copyright ownership. The ASF licenses this file *
+ * to you under the Apache License, Version 2.0 (the *
+ * "License"); you may not use this file except in compliance *
+ * with the License. You may obtain a copy of the License at *
+ * *
+ * http://www.apache.org/licenses/LICENSE-2.0 *
+ * *
+ * Unless required by applicable law or agreed to in writing, *
+ * software distributed under the License is distributed on an *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY *
+ * KIND, either express or implied. See the License for the *
+ * specific language governing permissions and limitations *
+ * under the License. *
+ ****************************************************************/
+
+
+
+
+package org.apache.james.util.urirbl;
+
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.regex.*;
+import java.net.URI;
+
+public class URIScanner {
+
+ /* These regular expressions "inspired" by Spamassassin */
+ static private final String reserved = ";/?:@&=+$,[]\\#|";
+
+ static private final String reservedNoColon = ";/?@&=+$,[]\\#|";
+
+ static private final String mark = "-_.!~*'()";
+
+ static private final String unreserved = "A-Za-z0-9" + escape(mark)
+ + "\\x00-\\x08\\x0b\\x0c\\x0e-\\x1f";
+
+ static private final String uricSet = escape(reserved) + unreserved + "%";
+
+ static private final String uricNoColon = escape(reservedNoColon)
+ + unreserved + "%";
+
+ static private final String schemeRE = "(?-xism:(?:https?|ftp|mailto|javascript|file))";
+
+ static private final String schemelessRE = "(?-xism:(?<![.=])(?:(?i)www\\d*\\.|(?i)ftp\\.))";
+
+ static private final String uriRE = "(?-xism:\\b(?:" + schemeRE + ":["
+ + uricNoColon + "]|" + schemelessRE + ")[" + uricSet + "#]*)";
+
+ /** Pre-compiled pattern that matches URIs */
+ static private final Pattern uriPattern = Pattern.compile(uriRE);
+
+ /** Pre-compiled pattern that matches URI scheme strings */
+ static private final Pattern schemePattern = Pattern.compile("^" + schemeRE
+ + ":");
+
+ /** Pre-compiled pattern used to cleanup a found URI string */
+ static private final Pattern uriCleanup = Pattern.compile("^<(.*)>$");
+
+ /** Pre-compiled pattern used to cleanup a found URI string */
+ static private final Pattern uriCleanup2 = Pattern.compile("[\\]\\)>#]$");
+
+ /** Pre-compile pattern for identifying "mailto" patterns */
+ static private final Pattern uriCleanup3 = Pattern
+ .compile("^(?i)mailto:([^\\/]{2})(.*)$");
+
+ /* These regular expressions also "inspired" by Spamassassin */
+ static private final String esc = "\\\\";
+
+ static private final String period = "\\.";
+
+ static private final String space = "\\040";
+
+ static private final String open_br = "\\[";
+
+ static private final String close_br = "\\]";
+
+ static private final String nonASCII = "\\x80-\\xff";
+
+ static private final String ctrl = "\\000-\\037";
+
+ static private final String cr_list = "\\n\\015";
+
+ static private final String qtext = "[^" + esc + nonASCII + cr_list + "\"]";
+
+ static private final String dtext = "[^" + esc + nonASCII + cr_list
+ + open_br + close_br + "]";
+
+ static private final String quoted_pair = esc + "[^" + nonASCII + "]";
+
+ static private final String atom_char = "[^(" + space + ")<>@,;:\"." + esc
+ + open_br + close_br + ctrl + nonASCII + "]";
+
+ static private final String atom = "(?>" + atom_char + "+)";
+
+ static private final String quoted_str = "\"" + qtext + "*(?:"
+ + quoted_pair + qtext + "*)*\"";
+
+ static private final String word = "(?:" + atom + "|" + quoted_str + ")";
+
+ static private final String local_part = word + "(?:" + period + word
+ + ")*";
+
+ static private final String label = "[A-Za-z\\d](?:[A-Za-z\\d-]*[A-Za-z\\d])?";
+
+ static private final String domain_ref = label + "(?:" + period + label
+ + ")*";
+
+ static private final String domain_lit = open_br + "(?:" + dtext + "|"
+ + quoted_pair + ")*" + close_br;
+
+ static private final String domain = "(?:" + domain_ref + "|" + domain_lit
+ + ")";
+
+ static private final String Addr_spec_re = "(?-xism:" + local_part
+ + "\\s*\\@\\s*" + domain + ")";
+
+ /** Pre-compiled pattern for matching "schemeless" mailto strings */
+ static private final Pattern emailAddrPattern = Pattern
+ .compile(Addr_spec_re);
+
+ /** Simple reqular expression to match an octet part of an IP address */
+ static private final String octet = "(?:[1-2][0-9][0-9])|(?:[1-9][0-9])|(?:[0-9])";
+
+ /** Simple regular expression to match a part of a domain string in the
+ TLDLookup cache. */
+ static private final String tld = "[A-Za-z0-9\\-]*";
+
+ /** Simple regular expression that matches a two-part TLD */
+ static private final String tld2 = tld + "\\." + tld;
+
+ /** Simple regular expression that matches a three-part TLD */
+ static private final String tld3 = tld + "\\." + tld + "\\." + tld;
+
+ /** Regular expression that matches and captures parts of a possible
+ one-part TLD domain string */
+ static private final String tldCap = "(" + tld + "\\.(" + tld + "))$";
+
+ /** Regular expression that matches and captures parts of a possible
+ two-part TLD domain string */
+ static private final String tld2Cap = "(" + tld + "\\.(" + tld2 + "))$";
+
+ /** Regular expression that matches and captures parts of a possible
+ three-part TLD domain string */
+ static private final String tld3Cap = "(" + tld + "\\.(" + tld3 + "))$";
+
+ /** Regular expression that matches and captures parts of an IP address */
+ static private final String ipCap = "((" + octet + ")\\.(" + octet
+ + ")\\.(" + octet + ")\\.(" + octet + "))$";
+
+ /** Pre-compiled pattern that matches IP addresses */
+ static private final Pattern ipCapPattern = Pattern.compile(ipCap);
+
+ /** Pre-compiled pattern that matches domain string that is possibly
+ contained in a one-part TLD */
+ static private final Pattern tldCapPattern = Pattern.compile(tldCap);
+
+ /** Pre-compiled pattern that matches domain string that is possibly
+ contained in a two-part TLD */
+ static private final Pattern tld2CapPattern = Pattern.compile(tld2Cap);
+
+ /** Pre-compiled pattern that matches domain string that is possibly
+ contained in a three-part TLD */
+ static private final Pattern tld3CapPattern = Pattern.compile(tld3Cap);
+
+ /** controls testing/debug output */
+ static private boolean testing = false;
+
+ /**
+ * Scans a character sequence for URIs. Then add all unique domain strings
+ * derived from those found URIs to the supplied HashSet.
+ * <p>
+ * This function calls scanContentForHosts() to grab all the host strings.
+ * Then it calls domainFromHost() on each host string found to distill them
+ * to their basic "registrar" domains.
+ *
+ * @param domains a HashSet to be populated with all domain strings found in
+ * the content
+ * @param content a character sequence to be scanned for URIs
+ */
+ static public void scanContentForDomains(HashSet domains,
+ CharSequence content) {
+ HashSet hosts = scanContentForHosts(content);
+ for (Iterator i = hosts.iterator(); i.hasNext();) {
+ String domain = domainFromHost((String) i.next());
+ if (null != domain) {
+ if (false == domains.contains(domain)) {
+ domains.add(domain);
+ }
+ }
+ }
+ }
+
+ /**
+ * Scans a character sequence for URIs. Then returns all unique host strings
+ * derived from those found URIs in a HashSet
+ *
+ * @param content a character sequence to be scanned for URIs
+ * @return a HashSet containing host strings
+ */
+ static protected HashSet scanContentForHosts(CharSequence content) {
+ HashSet set = new HashSet();
+ try {
+ // look for URIs
+ Matcher mat = uriPattern.matcher(content);
+ while (mat.find()) {
+ String found = mat.group();
+ Matcher cleanMat = uriCleanup.matcher(found);
+ if (cleanMat.find()) {
+ found = cleanMat.group(1);
+ }
+ cleanMat = uriCleanup2.matcher(found);
+ if (cleanMat.find()) {
+ found = cleanMat.replaceAll("");
+ }
+ cleanMat = uriCleanup3.matcher(found);
+ if (cleanMat.find()) {
+ found = "mailto://" + cleanMat.group(1) + cleanMat.group(2);
+ }
+ cleanMat = schemePattern.matcher(found);
+ if (!cleanMat.find()) {
+ if (found.matches("^(?i)www\\d*\\..*")) {
+ found = "http://" + found;
+ } else if (found.matches("^(?i)ftp\\..*")) {
+ found = "ftp://" + found;
+ }
+ }
+ String host = hostFromUriStr(found);
+ if (null != host) {
+ host = host.toLowerCase();
+ if (false == set.contains(host)) {
+ set.add(host);
+ }
+ }
+ }
+
+ // look for "schemeless" email addresses, too
+ mat = emailAddrPattern.matcher(content);
+ while (mat.find()) {
+ String found = mat.group();
+ debugOut("******** mailfound=\"" + found + "\"");
+ found = "mailto://" + found;
+ debugOut("*******6 mailfoundfound=\"" + found
+ + "\" after cleanup 6");
+ String host = hostFromUriStr(found);
+ if (null != host) {
+ host = host.toLowerCase();
+ if (false == set.contains(host)) {
+ set.add(host);
+ }
+ }
+ }
+ } catch (Exception ex) {
+ debugOut(ex.toString());
+ ex.printStackTrace();
+ }
+ return set;
+ }
+
+ /**
+ * Extracts and returns the host portion of URI string.
+ *
+ * This function uses java.net.URI.
+ *
+ * @param uriStr a string containing a URI
+ * @return the host portion of the supplied URI, null if no host string
+ * could be found
+ */
+ static protected String hostFromUriStr(String uriStr) {
+ debugOut("hostFromUriStr(\"" + uriStr + "\")");
+ String host = null;
+ try {
+ URI uri = new URI(uriStr);
+ host = uri.getHost();
+ } catch (Exception ex) {
+ }
+ return host;
+ }
+
+ /**
+ * Extracts and returns the registrar domain portion of a host string. This
+ * funtion checks all known multi-part TLDs to make sure that registrar
+ * domain is complete. For example, if the supplied host string is
+ * "subdomain.example.co.uk", the TLD is "co.uk" and not "uk". Therefore,
+ * the correct registrar domain is not "co.uk", but "example.co.uk". If the
+ * domain string is an IP address, then the octets are returned in reverse
+ * order.
+ *
+ * @param host a string containing a host name
+ * @return the registrar domain portion of the supplied host string
+ */
+ static protected String domainFromHost(String host) {
+ debugOut("domainFromHost(\"" + host + "\")");
+ String domain = null;
+ Matcher mat;
+ try {
+
+ // IP addrs
+ mat = ipCapPattern.matcher(host);
+ if (mat.find()) {
+ // reverse the octets now
+ domain = mat.group(5) + "." + mat.group(4) + "." + mat.group(3)
+ + "." + mat.group(2);
+ debugOut("domain=\"" + domain + "\"");
+ return domain;
+ }
+
+ // 3-part TLDs
+ mat = tld3CapPattern.matcher(host);
+ if (mat.find()) {
+ String tld = mat.group(2);
+ if (TLDLookup.isThreePartTLD(tld)) {
+ domain = mat.group(1);
+ debugOut("domain=\"" + domain + ", tld=\"" + tld + "\"");
+ return domain;
+ }
+ }
+
+ // 2-part TLDs
+ mat = tld2CapPattern.matcher(host);
+ if (mat.find()) {
+ String tld = mat.group(2);
+ if (TLDLookup.isTwoPartTLD(tld)) {
+ domain = mat.group(1);
+ debugOut("domain=\"" + domain + ", tld=\"" + tld + "\"");
+ return domain;
+ }
+ }
+
+ // 1-part TLDs
+ mat = tldCapPattern.matcher(host);
+ if (mat.find()) {
+ String tld = mat.group(2);
+ domain = mat.group(1);
+ debugOut("domain=\"" + domain + ", tld=\"" + tld + "\"");
+ return domain;
+ }
+ } catch (Exception ex) {
+ debugOut(ex.toString());
+ ex.printStackTrace();
+ }
+ return domain;
+ }
+
+ /**
+ * Debugging output
+ */
+ private static void debugOut(String msg) {
+ if (true == testing) {
+ System.out.println(msg);
+ }
+ }
+
+ /**
+ * A utility function that "escapes" special characters in a string.
+ *
+ * @param str a string to be processed
+ * @return modified "escaped" string
+ */
+ private static String escape(String str) {
+ StringBuffer buffer = new StringBuffer();
+ for (int i = 0; i < str.length(); i++) {
+ char ch = str.charAt(i);
+ if (Character.isDigit(ch) || Character.isUpperCase(ch)
+ || Character.isLowerCase(ch) || ch == '_') {
+ buffer.append(ch);
+ } else {
+ buffer.append("\\");
+ buffer.append(ch);
+ }
+ }
+ return buffer.toString();
+ }
+}
Added: james/server/trunk/src/test/org/apache/james/smtpserver/URIRBLHandlerTest.java
URL: http://svn.apache.org/viewvc/james/server/trunk/src/test/org/apache/james/smtpserver/URIRBLHandlerTest.java?view=auto&rev=441631
==============================================================================
--- james/server/trunk/src/test/org/apache/james/smtpserver/URIRBLHandlerTest.java (added)
+++ james/server/trunk/src/test/org/apache/james/smtpserver/URIRBLHandlerTest.java Fri Sep 8 13:08:33 2006
@@ -0,0 +1,209 @@
+/****************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one *
+ * or more contributor license agreements. See the NOTICE file *
+ * distributed with this work for additional information *
+ * regarding copyright ownership. The ASF licenses this file *
+ * to you under the Apache License, Version 2.0 (the *
+ * "License"); you may not use this file except in compliance *
+ * with the License. You may obtain a copy of the License at *
+ * *
+ * http://www.apache.org/licenses/LICENSE-2.0 *
+ * *
+ * Unless required by applicable law or agreed to in writing, *
+ * software distributed under the License is distributed on an *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY *
+ * KIND, either express or implied. See the License for the *
+ * specific language governing permissions and limitations *
+ * under the License. *
+ ****************************************************************/
+
+
+package org.apache.james.smtpserver;
+
+import java.io.IOException;
+import java.net.InetAddress;
+import java.net.UnknownHostException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+
+import javax.mail.MessagingException;
+import javax.mail.internet.MimeMessage;
+
+import junit.framework.TestCase;
+
+import org.apache.avalon.framework.container.ContainerUtil;
+import org.apache.james.services.DNSServer;
+import org.apache.james.smtpserver.core.filter.fastfail.URIRBLHandler;
+import org.apache.james.test.mock.avalon.MockLogger;
+import org.apache.james.test.mock.javaxmail.MockMimeMessage;
+import org.apache.james.test.mock.mailet.MockMail;
+import org.apache.mailet.Mail;
+
+public class URIRBLHandlerTest extends TestCase {
+ private static final String BAD_DOMAIN1 = "bad.domain.multi.surbl.org";
+ private static final String BAD_DOMAIN2 = "bad2.domain.multi.surbl.org";
+ private static final String GOOD_DOMAIN = "good.domain.multi.surbl.org";
+ private SMTPSession mockedSMTPSession;
+
+ private String response = null;
+
+ public void setUp() {
+ // reset reponse
+ response = null;
+ }
+
+ private SMTPSession setupMockedSMTPSession(final Mail mail) {
+ mockedSMTPSession = new AbstractSMTPSession() {
+
+ private HashMap state = new HashMap();
+
+ private String ipAddress = "192.168.0.1";
+
+ private String host = "localhost";
+
+ private boolean relayingAllowed;
+
+ public void abortMessage() {
+ }
+
+ public Mail getMail() {
+ return mail;
+ }
+
+ public String getRemoteHost() {
+ return host;
+ }
+
+ public String getRemoteIPAddress() {
+ return ipAddress;
+ }
+
+ public Map getState() {
+ state.put(SMTPSession.SENDER, "sender@james.apache.org");
+ return state;
+ }
+
+ public boolean isRelayingAllowed() {
+ return relayingAllowed;
+ }
+
+ public void setRelayingAllowed(boolean relayingAllowed) {
+ this.relayingAllowed = relayingAllowed;
+ }
+
+ public void writeResponse(String respString) {
+ response = respString;
+ }
+ };
+
+ return mockedSMTPSession;
+
+ }
+
+ private String getResponse() {
+ return response;
+ }
+
+ private Mail setupMockedMail(MimeMessage message) {
+ MockMail mail = new MockMail();
+ mail.setMessage(message);
+ return mail;
+ }
+
+ public MimeMessage setupMockedMimeMessage(String text)
+ throws MessagingException {
+ MimeMessage message = new MimeMessage(new MockMimeMessage());
+ message.setText(text);
+ message.saveChanges();
+
+ return message;
+ }
+
+ /**
+ * Setup the mocked dnsserver
+ *
+ */
+ private DNSServer setupMockedDnsServer() {
+ DNSServer mockedDnsServer = new DNSServer() {
+
+ public Collection findMXRecords(String hostname) {
+ throw new UnsupportedOperationException("Unimplemented in mock");
+ }
+
+ public Collection findTXTRecords(String hostname) {
+ List res = new ArrayList();
+ if (hostname == null) {
+ return res;
+ }
+ ;
+ if (BAD_DOMAIN1.equals(hostname)) {
+ res.add("Blocked - see http://www.surbl.org");
+ }
+ return res;
+ }
+
+ public Iterator getSMTPHostAddresses(String domainName) {
+ throw new UnsupportedOperationException("Unimplemented in mock");
+ }
+
+ public InetAddress[] getAllByName(String host)
+ throws UnknownHostException {
+ throw new UnsupportedOperationException("Unimplemented in mock");
+ }
+
+ public InetAddress getByName(String host)
+ throws UnknownHostException {
+ if (BAD_DOMAIN1.equals(host)) {
+ return InetAddress.getByName("127.0.0.1");
+ } else if (BAD_DOMAIN2.equals(host)) {
+ return InetAddress.getByName("127.0.0.1");
+ } else if (GOOD_DOMAIN.equals(host)) {
+ return InetAddress.getByName("fesdgaeg.deger");
+ }
+ return InetAddress.getByName(host);
+ }
+ };
+
+ return mockedDnsServer;
+ }
+
+ public void testNotBlocked() throws IOException, MessagingException {
+
+
+ ArrayList servers = new ArrayList();
+ servers.add("multi.surbl.org");
+
+ SMTPSession session = setupMockedSMTPSession(setupMockedMail(setupMockedMimeMessage("http://" + GOOD_DOMAIN + "/")));
+
+ URIRBLHandler handler = new URIRBLHandler();
+
+ ContainerUtil.enableLogging(handler, new MockLogger());
+ handler.setDnsServer(setupMockedDnsServer());
+ handler.setUriRblServer(servers);
+ handler.onMessage(session);
+
+ assertNull("Email was not rejected", getResponse());
+ }
+
+ public void testBlocked() throws IOException, MessagingException {
+
+
+ ArrayList servers = new ArrayList();
+ servers.add("multi.surbl.org");
+
+ SMTPSession session = setupMockedSMTPSession(setupMockedMail(setupMockedMimeMessage("http://" + BAD_DOMAIN1 + "/")));
+
+ URIRBLHandler handler = new URIRBLHandler();
+
+ ContainerUtil.enableLogging(handler, new MockLogger());
+ handler.setDnsServer(setupMockedDnsServer());
+ handler.setUriRblServer(servers);
+ handler.onMessage(session);
+
+ assertNull("Email was rejected", getResponse());
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: server-dev-unsubscribe@james.apache.org
For additional commands, e-mail: server-dev-help@james.apache.org