You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@roller.apache.org by ag...@apache.org on 2005/09/25 09:07:31 UTC

svn commit: r291379 - in /incubator/roller/branches/roller_2.0: src/org/roller/presentation/BlacklistUpdateTask.java src/org/roller/util/Blacklist.java src/org/roller/util/CommentSpamChecker.java tests/org/roller/BlacklistTest.java

Author: agilliland
Date: Sun Sep 25 00:07:20 2005
New Revision: 291379

URL: http://svn.apache.org/viewcvs?rev=291379&view=rev
Log:
updated MT Blacklist class.  basically just reworked the methods that deal with
downloading and reading in the file, plus added some debugging.


Modified:
    incubator/roller/branches/roller_2.0/src/org/roller/presentation/BlacklistUpdateTask.java
    incubator/roller/branches/roller_2.0/src/org/roller/util/Blacklist.java
    incubator/roller/branches/roller_2.0/src/org/roller/util/CommentSpamChecker.java
    incubator/roller/branches/roller_2.0/tests/org/roller/BlacklistTest.java

Modified: incubator/roller/branches/roller_2.0/src/org/roller/presentation/BlacklistUpdateTask.java
URL: http://svn.apache.org/viewcvs/incubator/roller/branches/roller_2.0/src/org/roller/presentation/BlacklistUpdateTask.java?rev=291379&r1=291378&r2=291379&view=diff
==============================================================================
--- incubator/roller/branches/roller_2.0/src/org/roller/presentation/BlacklistUpdateTask.java (original)
+++ incubator/roller/branches/roller_2.0/src/org/roller/presentation/BlacklistUpdateTask.java Sun Sep 25 00:07:20 2005
@@ -4,7 +4,8 @@
 package org.roller.presentation;
 
 import java.util.TimerTask;
-
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 import org.roller.RollerException;
 import org.roller.model.Roller;
 import org.roller.model.RollerFactory;
@@ -12,21 +13,46 @@
 import org.roller.util.Blacklist;
 
 /**
- * @author lance.lavandowska
+ * Update MT Blacklist if needed.
+ *
+ * @author Allen Gilliland
  */
-public class BlacklistUpdateTask extends TimerTask implements ScheduledTask
-{
-    public void run() 
-    {
-        // try reading new def from URL
-        Blacklist.checkForUpdate();
+public class BlacklistUpdateTask extends TimerTask implements ScheduledTask {
+    
+    private static Log mLogger = LogFactory.getLog(BlacklistUpdateTask.class);
+    
+    
+    /**
+     * Task init.
+     */
+    public void init(Roller roller, String realPath) throws RollerException {
+        mLogger.debug("initing");
     }
-    public void init(Roller roller, String realPath) throws RollerException
-    {
-        // load Blacklist from file
-        String uploadDir = RollerFactory.getRoller().getFileManager().getUploadDir();
-        Blacklist.getBlacklist(null, uploadDir);
-        // now have it check for an update
+    
+    
+    /**
+     * Excecute the task.
+     */
+    public void run() {
+        
+        mLogger.info("task started");
+
         Blacklist.checkForUpdate();
+        
+        mLogger.info("task completed");
+    }
+    
+    
+    /**
+     * Main method so that this task may be run from outside the webapp.
+     */
+    public static void main(String[] args) throws Exception {
+        
+        // NOTE: if this task is run externally from the Roller webapp then
+        // all it will really be doing is downloading the MT blacklist file
+        BlacklistUpdateTask task = new BlacklistUpdateTask();
+        task.init(null, null);
+        task.run();
     }
+    
 }

Modified: incubator/roller/branches/roller_2.0/src/org/roller/util/Blacklist.java
URL: http://svn.apache.org/viewcvs/incubator/roller/branches/roller_2.0/src/org/roller/util/Blacklist.java?rev=291379&r1=291378&r2=291379&view=diff
==============================================================================
--- incubator/roller/branches/roller_2.0/src/org/roller/util/Blacklist.java (original)
+++ incubator/roller/branches/roller_2.0/src/org/roller/util/Blacklist.java Sun Sep 25 00:07:20 2005
@@ -3,10 +3,8 @@
  */
 package org.roller.util;
 
-import org.roller.util.StringUtils;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
-
 import java.io.BufferedReader;
 import java.io.FileInputStream;
 import java.io.FileWriter;
@@ -14,9 +12,8 @@
 import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.io.File;
-import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
 import java.net.HttpURLConnection;
-import java.net.MalformedURLException;
 import java.net.URL;
 import java.text.ParseException;
 import java.text.SimpleDateFormat;
@@ -26,197 +23,241 @@
 import java.util.List;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
+import org.roller.config.RollerConfig;
+
 
 /**
- * Based on the list provided by Jay Allen for
- * MT-Blacklist:
- * http://www.jayallen.org/projects/mt-blacklist/
- * 
+ * Based on the list provided by Jay Allen for MT-Blacklist:
+ *  http://www.jayallen.org/projects/mt-blacklist/
+ *
  * Will provide response whether submitted string
  * contains an item listed in the supplied blacklist.
  * This implementation does not do everything
  * MT-Blacklist does, such as the "Search & De-spam mode".
- * 
+ *
  * @author lance
+ * @author Allen Gilliland
  */
-public class Blacklist
-{
+public class Blacklist {
+    
     private static Log mLogger = LogFactory.getLog(Blacklist.class);
-
-    private static Blacklist blacklist;
     
-    public  static final String blacklistFile = "blacklist.txt";
-    private static final String blacklistURL = "http://www.jayallen.org/comment_spam/blacklist.txt";
+    private static final String blacklistFile = "blacklist.txt";
+    private static final String blacklistURL = 
+            "http://www.jayallen.org/comment_spam/blacklist.txt";
     private static final String lastUpdateStr = "Last update:";
 
-    // Default location of blacklist file (relative to realPath) in case that uploadDir is null or empty
-    // and realPath is non-null.
-    private static final String DEFAULT_BLACKLIST_DIR = "resources";
-    private String realPath;
-    private String uploadDir;
-
+    private static Blacklist blacklist;
+    
+    // Non-Static attributes
+    private String uploadDir = null;
+    private Date lastModified = null;
     private List blacklistStr = new LinkedList();
     private List blacklistRegex = new LinkedList();
     
-    private Date ifModifiedSince = null;
+    
+    // setup our singleton at class loading time
+    static {
+        mLogger.info("Initializing MT Blacklist");
+        
+        blacklist = new Blacklist();
+        blacklist.loadBlacklistFromFile();
+    }
 
+    
+    /**
+     * Hide constructor
+     */
+    private Blacklist() {
+        this.uploadDir = RollerConfig.getProperty("uploads.dir");
+    }
+    
+    
     /**
      * Singleton factory method.
      */
-    public static Blacklist getBlacklist(String realPath, String uploadDir)
-    {
-        if (blacklist == null)
-        {
-            Blacklist temp = new Blacklist(realPath, uploadDir);
-            temp.extractFromFile();
-            blacklist = temp;
-        }
+    public static Blacklist getBlacklist() {
         return blacklist;
     }
     
+    
     /**
-     * This will try to download a new set of Blacklist
-     * rules.  If no change has occurred then return
-     * current Blacklist.
-     * 
-     * @return New Blacklist if rules have changed,
-     * otherwise return current Blacklist.
-     */
-    public static void checkForUpdate()
-    {
-        blacklist = blacklist.extractFromURL();
+     * Updated MT blacklist if necessary.
+     */
+    public static void checkForUpdate() {
+        getBlacklist().update();
     }
-
+    
+    
     /**
-     * Hide constructor
+     * Non-Static update method.
      */
-    private Blacklist(String realPath, String uploadDir)
-    {
-        this.realPath = realPath;
-        this.uploadDir = uploadDir;
+    public void update() {
+        boolean blacklist_updated = this.downloadBlacklist();
+        
+        if(blacklist_updated)
+            this.loadBlacklistFromFile();
     }
     
+    
     /**
-     * Read a local file for Blacklist rules.
+     * Download the MT blacklist from the web to our uploads directory.
      */
-    private void extractFromFile()
-    {
-        InputStream txtStream = getFileInputStream();
-        if (txtStream != null)
-        {
+    private boolean downloadBlacklist() {
+        
+        boolean blacklist_updated = false;
+        try {
+            mLogger.debug("attempting to download MT blacklist");
+            
+            URL url = new URL(blacklistURL);
+            HttpURLConnection connection = 
+                    (HttpURLConnection) url.openConnection();
+            
+            // after spending way too much time debugging i've discovered
+            // that the blacklist server is selective based on the User-Agent
+            // header.  without this header set i always get a 403 response :(
+            connection.setRequestProperty("User-Agent", "Mozilla/5.0");
+            
+            if (this.lastModified != null) {
+                connection.setRequestProperty("If-Modified-Since",
+                        DateUtil.formatRfc822(this.lastModified));
+            }
+            
+            int responseCode = connection.getResponseCode();
+            
+            mLogger.debug("HttpConnection response = "+responseCode);
+            
+            // did the connection return NotModified? If so, no need to parse
+            if (responseCode == HttpURLConnection.HTTP_NOT_MODIFIED) {
+                mLogger.debug("MT blacklist site says we are current");
+                return false;
+            }
+            
+            // did the connection return a LastModified header?
+            long lastModifiedLong = 
+                    connection.getHeaderFieldDate("Last-Modified", -1);
+            
+            // if the file is newer than our current then we need do update it
+            if (responseCode == HttpURLConnection.HTTP_OK &&
+                    (this.lastModified == null ||
+                    this.lastModified.getTime() < lastModifiedLong)) {
+
+                mLogger.debug("my last modified = "+this.lastModified.getTime());
+                mLogger.debug("MT last modified = "+lastModifiedLong);
+                
+                // save the new blacklist
+                InputStream instream = connection.getInputStream();
+                
+                String path = this.uploadDir + File.separator + blacklistFile;
+                FileOutputStream outstream = new FileOutputStream(path);
+                
+                mLogger.debug("writing updated MT blacklist to "+path);
+                
+                // read from url and write to file
+                byte[] buf = new byte[4096];
+                int length = 0;
+                while((length = instream.read(buf)) > 0)
+                    outstream.write(buf, 0, length);
+                
+                outstream.close();
+                instream.close();
+                
+                blacklist_updated = true;
+                
+                mLogger.debug("MT blacklist download completed.");
+                
+            } else {
+                mLogger.debug("blacklist *NOT* saved, assuming we are current");
+            }
+            
+        } catch (Exception e) {
+            mLogger.error("error downloading blacklist", e);
+        }
+        
+        return blacklist_updated;
+    }
+    
+    
+    /**
+     * Load the MT blacklist from the file system.
+     *
+     * We look for a previously downloaded version of the blacklist first and
+     * if it's not found then we load the default blacklist packed with Roller.
+     */
+    private void loadBlacklistFromFile() {
+        
+        InputStream txtStream = null;
+        try {
+            String path = this.uploadDir + File.separator + blacklistFile;
+            File blacklistFile = new File(path);
+            
+            // check our lastModified date to see if we need to re-read the file
+            if(this.lastModified != null &&
+                    this.lastModified.getTime() >= blacklistFile.lastModified()) {
+                
+                mLogger.debug("Blacklist is current, no need to load again");
+                return;
+            } else {
+                this.lastModified = new Date(blacklistFile.lastModified());
+            }
+            
+            txtStream = new FileInputStream(blacklistFile);
+            
+            mLogger.debug("Loading blacklist from "+path);
+        } catch (Exception e) {
+            // Roller keeps a copy in the webapp just in case
+            txtStream = getClass().getResourceAsStream("/"+blacklistFile);
+            
+            mLogger.debug("Couldn't find downloaded blacklist, "+
+                    "loading from classpath instead");
+        }
+        
+        if (txtStream != null) {
             readFromStream(txtStream, false);
+        } else {
+            mLogger.error("couldn't load a blacklist file from anywhere, "+
+                    "this means blacklist checking is disabled for now.");
         }
-        else
-        {
-            throw new NullPointerException("Unable to load blacklist.txt.  " +
-            "Make sure blacklist.txt is in classpath.");
-        }    
     }
-
+    
+    
     /**
      * Read in the InputStream for rules.
      * @param txtStream
      */
-    private String readFromStream(InputStream txtStream, boolean saveStream)
-    {
+    private String readFromStream(InputStream txtStream, boolean saveStream) {
         String line;
         StringBuffer buf = new StringBuffer();
         BufferedReader in = null;
-        try
-        {
-            in = new BufferedReader( 
-                new InputStreamReader( txtStream, "UTF-8" ) );
-            while ((line = in.readLine()) != null)
-            {
-                if (line.startsWith("#"))
-                {
+        try {
+            in = new BufferedReader(
+                    new InputStreamReader( txtStream, "UTF-8" ) );
+            while ((line = in.readLine()) != null) {
+                if (line.startsWith("#")) {
                     readComment(line);
-                }
-                else
-                {
+                } else {
                     readRule(line);
                 }
                 
                 if (saveStream) buf.append(line).append("\n");
             }
-        }
-        catch (Exception e)
-        {
+        } catch (Exception e) {
             mLogger.error(e);
-        }
-        finally
-        {
-           try
-            {
-                 if (in != null) in.close();
-            }
-            catch (IOException e1)
-            {
+        } finally {
+            try {
+                if (in != null) in.close();
+            } catch (IOException e1) {
                 mLogger.error(e1);
             }
         }
         return buf.toString();
     }
     
-    /**
-     * Connect to the web for blacklist.  Check to
-     * see if a newer version exists before parsing.
-     */
-    private Blacklist extractFromURL()
-    {
-        // now see if we can update it from the web
-        Blacklist oldBlacklist = getBlacklist(realPath, uploadDir);
-        Blacklist newBlacklist = new Blacklist(realPath, uploadDir);
-        try
-        {
-            URL url = new URL(blacklistURL);
-            HttpURLConnection connection = (HttpURLConnection)url.openConnection();
-            if (oldBlacklist.ifModifiedSince != null)
-            {
-                connection.setRequestProperty("If-Modified-Since",
-                                              DateUtil.formatRfc822(oldBlacklist.ifModifiedSince));
-            }
-
-            // did the connection return NotModified? If so, no need to parse
-            if ( connection.getResponseCode() == HttpURLConnection.HTTP_NOT_MODIFIED)
-            {
-                // we already have a current blacklist
-                return oldBlacklist;
-            }
-
-            // did the connection return a LastModified header?
-            long lastModifiedLong = connection.getHeaderFieldDate("Last-Modified", -1);
-
-            // if no ifModifiedSince, or lastModifiedLong is newer, then read stream
-            if (oldBlacklist.ifModifiedSince == null ||
-                oldBlacklist.ifModifiedSince.getTime() < lastModifiedLong)
-            {
-                String results = newBlacklist.readFromStream( connection.getInputStream(), true );
-
-                // save the new blacklist
-                newBlacklist.writeToFile(results);
-
-                if (newBlacklist.ifModifiedSince == null && lastModifiedLong != -1)
-                {
-                    newBlacklist.ifModifiedSince = new Date(lastModifiedLong);
-                }
-
-                return newBlacklist;
-            }
-        }
-        catch (Exception e)
-        {
-            // Catch all exceptions and just log at INFO (should this be WARN?) without a full stacktrace.
-            mLogger.info("Roller Blacklist Update: Unable to update comment spam blacklist due to exception: " + e);
-        }
-        return oldBlacklist;
-    }
-
+    
     /**
      * @param str
      */
-    private void readRule(String str)
-    {
+    private void readRule(String str) {
         if (StringUtils.isEmpty(str)) return; // bad condition
         
         String rule = str.trim();
@@ -231,49 +272,43 @@
         {
             // pre-compile patterns since they will be frequently used
             blacklistRegex.add(Pattern.compile(rule));
-        }
-        else if (StringUtils.isNotEmpty(rule))
-        {    
+        } else if (StringUtils.isNotEmpty(rule)) {
             blacklistStr.add(rule);
         }
     }
-
+    
+    
     /**
      * Try to parse out "Last update" value: 2004/03/08 23:17:30.
      * @param str
      */
-    private void readComment(String str)
-    {
+    private void readComment(String str) {
         int lastUpdatePos = str.indexOf(lastUpdateStr);
-        if (lastUpdatePos > -1)
-        {
+        if (lastUpdatePos > -1) {
             str = str.substring(lastUpdatePos + lastUpdateStr.length());
             str = str.trim();
-            try
-            {
+            try {
                 SimpleDateFormat sdf = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss");
-                ifModifiedSince = DateUtil.parse(str, sdf);
-            }
-            catch (ParseException e)
-            {
+                lastModified = DateUtil.parse(str, sdf);
+            } catch (ParseException e) {
                 mLogger.debug("ParseException reading " + str);
             }
         }
     }
-
+    
+    
     /**
      * Does the String argument match any of the rules in the blacklist?
-     * 
+     *
      * @param str
      * @return
      */
-    public boolean isBlacklisted(String str)
-    {
+    public boolean isBlacklisted(String str) {
         if (str == null || StringUtils.isEmpty(str)) return false;
         
         // First iterate over blacklist, doing indexOf.
         // Then iterate over blacklistRegex and test.
-        // As soon as there is a hit in either case return true 
+        // As soon as there is a hit in either case return true
         
         // test plain String.indexOf
         if( testStringRules(str) ) return true;
@@ -282,37 +317,31 @@
         return testRegExRules(str);
     }
     
+    
     /**
      * Test String against the RegularExpression rules.
-     * 
+     *
      * @param str
      * @return
      */
-    private boolean testRegExRules(String str)
-    {
+    private boolean testRegExRules(String str) {
         boolean hit = false;
         Pattern testPattern = null;
         Iterator iter = blacklistRegex.iterator();
-        while (iter.hasNext())
-        {
+        while (iter.hasNext()) {
             testPattern = (Pattern)iter.next();
             
             // want to see what it is matching on
             // if we are in "debug mode"
-            if (mLogger.isDebugEnabled())
-            {
+            if (mLogger.isDebugEnabled()) {
                 Matcher matcher = testPattern.matcher(str);
-                if (matcher.find())
-                {
+                if (matcher.find()) {
                     mLogger.debug(matcher.group() + " matched by " + testPattern.pattern());
                     hit = true;
                     break;
                 }
-            }
-            else
-            {
-                if (testPattern.matcher(str).find())
-                {
+            } else {
+                if (testPattern.matcher(str).find()) {
                     hit = true;
                     break;
                 }
@@ -320,28 +349,25 @@
         }
         return hit;
     }
-
+    
+    
     /**
      * Test the String against the String rules,
      * using simple indexOf.
-     * 
+     *
      * @param str
      * @return
      */
-    private boolean testStringRules(String str)
-    {
+    private boolean testStringRules(String str) {
         String test;
         Iterator iter = blacklistStr.iterator();
         boolean hit = false;
-        while (iter.hasNext())
-        {
+        while (iter.hasNext()) {
             test = (String)iter.next();
             //System.out.println("check against |" + test + "|");
-            if (str.indexOf(test) > -1)
-            {
+            if (str.indexOf(test) > -1) {
                 // want to see what it is matching on
-                if (mLogger.isDebugEnabled())
-                {
+                if (mLogger.isDebugEnabled()) {
                     mLogger.debug("matched:" + test + ":");
                 }
                 hit = true;
@@ -351,87 +377,11 @@
         return hit;
     }
     
-    /**
-     * Try reading blacklist.txt from wherever RollerConfig.getUploadDir()
-     * is, otherwise try loading it from web resource (/WEB-INF/).
-     */
-    private InputStream getFileInputStream()
-    {
-        try
-        {
-            // TODO: clean up
-            // This was previously throwing an NPE to get to the exception case 
-            // when being called in several places with indexDir==null. 
-            // This is just about as bad; it needs to be cleaned up.
-            String path = getBlacklistFilePath();
-            if (path == null)
-            {
-                throw new FileNotFoundException(
-                        "null path (indexDir and realPath both null)");
-            }
-            return new FileInputStream( path );
-        }
-        catch (Exception e)
-        {
-            return getClass().getResourceAsStream("/"+blacklistFile);
-        }
-    }
-
-    /**
-     * @param results
-     */
-    private void writeToFile(String results)
-    {
-        FileWriter out = null;
-        String path = getBlacklistFilePath();
-        if (path == null)
-        {
-            mLogger.debug("Not writing blacklist file since directory paths were null.");
-            return;
-        }
-        try
-        {
-            // attempt writing results
-            out = new FileWriter(path);
-            out.write( results.toCharArray() );
-        }
-        catch (Exception e)
-        {
-            mLogger.info("Unable to write new " + path);
-        }
-        finally
-        {
-            try
-            {
-                if (out != null) out.close();
-            }
-            catch (IOException e)
-            {
-                mLogger.error("Unable to close stream to " + path);
-            }
-        }
-    }
-
-    // Added for ROL-612 - TODO: Consider refactoring - nearly duplicate code in FileManagerImpl.
-    private String getBlacklistFilePath() 
-    {
-        if (uploadDir == null && realPath==null) 
-        {
-            // to preserve existing behavior forced to interpret this differently
-            return null;
-        }
-        if (uploadDir == null || uploadDir.trim().length() == 0) 
-        {
-            uploadDir = realPath + File.separator + DEFAULT_BLACKLIST_DIR;
-        }
-        return uploadDir + File.separator + blacklistFile;
-    }
-
+    
     /**
      * Return pretty list of String and RegEx rules.
      */
-    public String toString()
-    {
+    public String toString() {
         StringBuffer buf = new StringBuffer("blacklist ");
         buf.append(blacklistStr).append("\n");
         buf.append("Regex blacklist ").append(blacklistRegex);

Modified: incubator/roller/branches/roller_2.0/src/org/roller/util/CommentSpamChecker.java
URL: http://svn.apache.org/viewcvs/incubator/roller/branches/roller_2.0/src/org/roller/util/CommentSpamChecker.java?rev=291379&r1=291378&r2=291379&view=diff
==============================================================================
--- incubator/roller/branches/roller_2.0/src/org/roller/util/CommentSpamChecker.java (original)
+++ incubator/roller/branches/roller_2.0/src/org/roller/util/CommentSpamChecker.java Sun Sep 25 00:07:20 2005
@@ -14,7 +14,7 @@
 public class CommentSpamChecker
 {
     private static Log mLogger = LogFactory.getLog(CommentSpamChecker.class);
-    private Blacklist blacklist = Blacklist.getBlacklist(null,null);
+    private Blacklist blacklist = Blacklist.getBlacklist();
 
     // -----------------------------------------------------------------------
     /**

Modified: incubator/roller/branches/roller_2.0/tests/org/roller/BlacklistTest.java
URL: http://svn.apache.org/viewcvs/incubator/roller/branches/roller_2.0/tests/org/roller/BlacklistTest.java?rev=291379&r1=291378&r2=291379&view=diff
==============================================================================
--- incubator/roller/branches/roller_2.0/tests/org/roller/BlacklistTest.java (original)
+++ incubator/roller/branches/roller_2.0/tests/org/roller/BlacklistTest.java Sun Sep 25 00:07:20 2005
@@ -39,7 +39,7 @@
     protected void setUp() throws Exception
     {
         super.setUp();
-        blacklist = Blacklist.getBlacklist(null,null);
+        blacklist = Blacklist.getBlacklist();
     }
 
     /**