You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@roller.apache.org by ag...@apache.org on 2005/09/25 09:07:31 UTC
svn commit: r291379 - in /incubator/roller/branches/roller_2.0:
src/org/roller/presentation/BlacklistUpdateTask.java
src/org/roller/util/Blacklist.java
src/org/roller/util/CommentSpamChecker.java
tests/org/roller/BlacklistTest.java
Author: agilliland
Date: Sun Sep 25 00:07:20 2005
New Revision: 291379
URL: http://svn.apache.org/viewcvs?rev=291379&view=rev
Log:
updated MT Blacklist class. basically just reworked the methods that deal with
downloading and reading in the file, plus added some debugging.
Modified:
incubator/roller/branches/roller_2.0/src/org/roller/presentation/BlacklistUpdateTask.java
incubator/roller/branches/roller_2.0/src/org/roller/util/Blacklist.java
incubator/roller/branches/roller_2.0/src/org/roller/util/CommentSpamChecker.java
incubator/roller/branches/roller_2.0/tests/org/roller/BlacklistTest.java
Modified: incubator/roller/branches/roller_2.0/src/org/roller/presentation/BlacklistUpdateTask.java
URL: http://svn.apache.org/viewcvs/incubator/roller/branches/roller_2.0/src/org/roller/presentation/BlacklistUpdateTask.java?rev=291379&r1=291378&r2=291379&view=diff
==============================================================================
--- incubator/roller/branches/roller_2.0/src/org/roller/presentation/BlacklistUpdateTask.java (original)
+++ incubator/roller/branches/roller_2.0/src/org/roller/presentation/BlacklistUpdateTask.java Sun Sep 25 00:07:20 2005
@@ -4,7 +4,8 @@
package org.roller.presentation;
import java.util.TimerTask;
-
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
import org.roller.RollerException;
import org.roller.model.Roller;
import org.roller.model.RollerFactory;
@@ -12,21 +13,46 @@
import org.roller.util.Blacklist;
/**
- * @author lance.lavandowska
+ * Update MT Blacklist if needed.
+ *
+ * @author Allen Gilliland
*/
-public class BlacklistUpdateTask extends TimerTask implements ScheduledTask
-{
- public void run()
- {
- // try reading new def from URL
- Blacklist.checkForUpdate();
+public class BlacklistUpdateTask extends TimerTask implements ScheduledTask {
+
+ private static Log mLogger = LogFactory.getLog(BlacklistUpdateTask.class);
+
+
+ /**
+ * Task init.
+ */
+ public void init(Roller roller, String realPath) throws RollerException {
+ mLogger.debug("initing");
}
- public void init(Roller roller, String realPath) throws RollerException
- {
- // load Blacklist from file
- String uploadDir = RollerFactory.getRoller().getFileManager().getUploadDir();
- Blacklist.getBlacklist(null, uploadDir);
- // now have it check for an update
+
+
+ /**
+ * Excecute the task.
+ */
+ public void run() {
+
+ mLogger.info("task started");
+
Blacklist.checkForUpdate();
+
+ mLogger.info("task completed");
+ }
+
+
+ /**
+ * Main method so that this task may be run from outside the webapp.
+ */
+ public static void main(String[] args) throws Exception {
+
+ // NOTE: if this task is run externally from the Roller webapp then
+ // all it will really be doing is downloading the MT blacklist file
+ BlacklistUpdateTask task = new BlacklistUpdateTask();
+ task.init(null, null);
+ task.run();
}
+
}
Modified: incubator/roller/branches/roller_2.0/src/org/roller/util/Blacklist.java
URL: http://svn.apache.org/viewcvs/incubator/roller/branches/roller_2.0/src/org/roller/util/Blacklist.java?rev=291379&r1=291378&r2=291379&view=diff
==============================================================================
--- incubator/roller/branches/roller_2.0/src/org/roller/util/Blacklist.java (original)
+++ incubator/roller/branches/roller_2.0/src/org/roller/util/Blacklist.java Sun Sep 25 00:07:20 2005
@@ -3,10 +3,8 @@
*/
package org.roller.util;
-import org.roller.util.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
-
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileWriter;
@@ -14,9 +12,8 @@
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.File;
-import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
import java.net.HttpURLConnection;
-import java.net.MalformedURLException;
import java.net.URL;
import java.text.ParseException;
import java.text.SimpleDateFormat;
@@ -26,197 +23,241 @@
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
+import org.roller.config.RollerConfig;
+
/**
- * Based on the list provided by Jay Allen for
- * MT-Blacklist:
- * http://www.jayallen.org/projects/mt-blacklist/
- *
+ * Based on the list provided by Jay Allen for MT-Blacklist:
+ * http://www.jayallen.org/projects/mt-blacklist/
+ *
* Will provide response whether submitted string
* contains an item listed in the supplied blacklist.
* This implementation does not do everything
* MT-Blacklist does, such as the "Search & De-spam mode".
- *
+ *
* @author lance
+ * @author Allen Gilliland
*/
-public class Blacklist
-{
+public class Blacklist {
+
private static Log mLogger = LogFactory.getLog(Blacklist.class);
-
- private static Blacklist blacklist;
- public static final String blacklistFile = "blacklist.txt";
- private static final String blacklistURL = "http://www.jayallen.org/comment_spam/blacklist.txt";
+ private static final String blacklistFile = "blacklist.txt";
+ private static final String blacklistURL =
+ "http://www.jayallen.org/comment_spam/blacklist.txt";
private static final String lastUpdateStr = "Last update:";
- // Default location of blacklist file (relative to realPath) in case that uploadDir is null or empty
- // and realPath is non-null.
- private static final String DEFAULT_BLACKLIST_DIR = "resources";
- private String realPath;
- private String uploadDir;
-
+ private static Blacklist blacklist;
+
+ // Non-Static attributes
+ private String uploadDir = null;
+ private Date lastModified = null;
private List blacklistStr = new LinkedList();
private List blacklistRegex = new LinkedList();
- private Date ifModifiedSince = null;
+
+ // setup our singleton at class loading time
+ static {
+ mLogger.info("Initializing MT Blacklist");
+
+ blacklist = new Blacklist();
+ blacklist.loadBlacklistFromFile();
+ }
+
+ /**
+ * Hide constructor
+ */
+ private Blacklist() {
+ this.uploadDir = RollerConfig.getProperty("uploads.dir");
+ }
+
+
/**
* Singleton factory method.
*/
- public static Blacklist getBlacklist(String realPath, String uploadDir)
- {
- if (blacklist == null)
- {
- Blacklist temp = new Blacklist(realPath, uploadDir);
- temp.extractFromFile();
- blacklist = temp;
- }
+ public static Blacklist getBlacklist() {
return blacklist;
}
+
/**
- * This will try to download a new set of Blacklist
- * rules. If no change has occurred then return
- * current Blacklist.
- *
- * @return New Blacklist if rules have changed,
- * otherwise return current Blacklist.
- */
- public static void checkForUpdate()
- {
- blacklist = blacklist.extractFromURL();
+ * Updated MT blacklist if necessary.
+ */
+ public static void checkForUpdate() {
+ getBlacklist().update();
}
-
+
+
/**
- * Hide constructor
+ * Non-Static update method.
*/
- private Blacklist(String realPath, String uploadDir)
- {
- this.realPath = realPath;
- this.uploadDir = uploadDir;
+ public void update() {
+ boolean blacklist_updated = this.downloadBlacklist();
+
+ if(blacklist_updated)
+ this.loadBlacklistFromFile();
}
+
/**
- * Read a local file for Blacklist rules.
+ * Download the MT blacklist from the web to our uploads directory.
*/
- private void extractFromFile()
- {
- InputStream txtStream = getFileInputStream();
- if (txtStream != null)
- {
+ private boolean downloadBlacklist() {
+
+ boolean blacklist_updated = false;
+ try {
+ mLogger.debug("attempting to download MT blacklist");
+
+ URL url = new URL(blacklistURL);
+ HttpURLConnection connection =
+ (HttpURLConnection) url.openConnection();
+
+ // after spending way too much time debugging i've discovered
+ // that the blacklist server is selective based on the User-Agent
+ // header. without this header set i always get a 403 response :(
+ connection.setRequestProperty("User-Agent", "Mozilla/5.0");
+
+ if (this.lastModified != null) {
+ connection.setRequestProperty("If-Modified-Since",
+ DateUtil.formatRfc822(this.lastModified));
+ }
+
+ int responseCode = connection.getResponseCode();
+
+ mLogger.debug("HttpConnection response = "+responseCode);
+
+ // did the connection return NotModified? If so, no need to parse
+ if (responseCode == HttpURLConnection.HTTP_NOT_MODIFIED) {
+ mLogger.debug("MT blacklist site says we are current");
+ return false;
+ }
+
+ // did the connection return a LastModified header?
+ long lastModifiedLong =
+ connection.getHeaderFieldDate("Last-Modified", -1);
+
+ // if the file is newer than our current then we need do update it
+ if (responseCode == HttpURLConnection.HTTP_OK &&
+ (this.lastModified == null ||
+ this.lastModified.getTime() < lastModifiedLong)) {
+
+ mLogger.debug("my last modified = "+this.lastModified.getTime());
+ mLogger.debug("MT last modified = "+lastModifiedLong);
+
+ // save the new blacklist
+ InputStream instream = connection.getInputStream();
+
+ String path = this.uploadDir + File.separator + blacklistFile;
+ FileOutputStream outstream = new FileOutputStream(path);
+
+ mLogger.debug("writing updated MT blacklist to "+path);
+
+ // read from url and write to file
+ byte[] buf = new byte[4096];
+ int length = 0;
+ while((length = instream.read(buf)) > 0)
+ outstream.write(buf, 0, length);
+
+ outstream.close();
+ instream.close();
+
+ blacklist_updated = true;
+
+ mLogger.debug("MT blacklist download completed.");
+
+ } else {
+ mLogger.debug("blacklist *NOT* saved, assuming we are current");
+ }
+
+ } catch (Exception e) {
+ mLogger.error("error downloading blacklist", e);
+ }
+
+ return blacklist_updated;
+ }
+
+
+ /**
+ * Load the MT blacklist from the file system.
+ *
+ * We look for a previously downloaded version of the blacklist first and
+ * if it's not found then we load the default blacklist packed with Roller.
+ */
+ private void loadBlacklistFromFile() {
+
+ InputStream txtStream = null;
+ try {
+ String path = this.uploadDir + File.separator + blacklistFile;
+ File blacklistFile = new File(path);
+
+ // check our lastModified date to see if we need to re-read the file
+ if(this.lastModified != null &&
+ this.lastModified.getTime() >= blacklistFile.lastModified()) {
+
+ mLogger.debug("Blacklist is current, no need to load again");
+ return;
+ } else {
+ this.lastModified = new Date(blacklistFile.lastModified());
+ }
+
+ txtStream = new FileInputStream(blacklistFile);
+
+ mLogger.debug("Loading blacklist from "+path);
+ } catch (Exception e) {
+ // Roller keeps a copy in the webapp just in case
+ txtStream = getClass().getResourceAsStream("/"+blacklistFile);
+
+ mLogger.debug("Couldn't find downloaded blacklist, "+
+ "loading from classpath instead");
+ }
+
+ if (txtStream != null) {
readFromStream(txtStream, false);
+ } else {
+ mLogger.error("couldn't load a blacklist file from anywhere, "+
+ "this means blacklist checking is disabled for now.");
}
- else
- {
- throw new NullPointerException("Unable to load blacklist.txt. " +
- "Make sure blacklist.txt is in classpath.");
- }
}
-
+
+
/**
* Read in the InputStream for rules.
* @param txtStream
*/
- private String readFromStream(InputStream txtStream, boolean saveStream)
- {
+ private String readFromStream(InputStream txtStream, boolean saveStream) {
String line;
StringBuffer buf = new StringBuffer();
BufferedReader in = null;
- try
- {
- in = new BufferedReader(
- new InputStreamReader( txtStream, "UTF-8" ) );
- while ((line = in.readLine()) != null)
- {
- if (line.startsWith("#"))
- {
+ try {
+ in = new BufferedReader(
+ new InputStreamReader( txtStream, "UTF-8" ) );
+ while ((line = in.readLine()) != null) {
+ if (line.startsWith("#")) {
readComment(line);
- }
- else
- {
+ } else {
readRule(line);
}
if (saveStream) buf.append(line).append("\n");
}
- }
- catch (Exception e)
- {
+ } catch (Exception e) {
mLogger.error(e);
- }
- finally
- {
- try
- {
- if (in != null) in.close();
- }
- catch (IOException e1)
- {
+ } finally {
+ try {
+ if (in != null) in.close();
+ } catch (IOException e1) {
mLogger.error(e1);
}
}
return buf.toString();
}
- /**
- * Connect to the web for blacklist. Check to
- * see if a newer version exists before parsing.
- */
- private Blacklist extractFromURL()
- {
- // now see if we can update it from the web
- Blacklist oldBlacklist = getBlacklist(realPath, uploadDir);
- Blacklist newBlacklist = new Blacklist(realPath, uploadDir);
- try
- {
- URL url = new URL(blacklistURL);
- HttpURLConnection connection = (HttpURLConnection)url.openConnection();
- if (oldBlacklist.ifModifiedSince != null)
- {
- connection.setRequestProperty("If-Modified-Since",
- DateUtil.formatRfc822(oldBlacklist.ifModifiedSince));
- }
-
- // did the connection return NotModified? If so, no need to parse
- if ( connection.getResponseCode() == HttpURLConnection.HTTP_NOT_MODIFIED)
- {
- // we already have a current blacklist
- return oldBlacklist;
- }
-
- // did the connection return a LastModified header?
- long lastModifiedLong = connection.getHeaderFieldDate("Last-Modified", -1);
-
- // if no ifModifiedSince, or lastModifiedLong is newer, then read stream
- if (oldBlacklist.ifModifiedSince == null ||
- oldBlacklist.ifModifiedSince.getTime() < lastModifiedLong)
- {
- String results = newBlacklist.readFromStream( connection.getInputStream(), true );
-
- // save the new blacklist
- newBlacklist.writeToFile(results);
-
- if (newBlacklist.ifModifiedSince == null && lastModifiedLong != -1)
- {
- newBlacklist.ifModifiedSince = new Date(lastModifiedLong);
- }
-
- return newBlacklist;
- }
- }
- catch (Exception e)
- {
- // Catch all exceptions and just log at INFO (should this be WARN?) without a full stacktrace.
- mLogger.info("Roller Blacklist Update: Unable to update comment spam blacklist due to exception: " + e);
- }
- return oldBlacklist;
- }
-
+
/**
* @param str
*/
- private void readRule(String str)
- {
+ private void readRule(String str) {
if (StringUtils.isEmpty(str)) return; // bad condition
String rule = str.trim();
@@ -231,49 +272,43 @@
{
// pre-compile patterns since they will be frequently used
blacklistRegex.add(Pattern.compile(rule));
- }
- else if (StringUtils.isNotEmpty(rule))
- {
+ } else if (StringUtils.isNotEmpty(rule)) {
blacklistStr.add(rule);
}
}
-
+
+
/**
* Try to parse out "Last update" value: 2004/03/08 23:17:30.
* @param str
*/
- private void readComment(String str)
- {
+ private void readComment(String str) {
int lastUpdatePos = str.indexOf(lastUpdateStr);
- if (lastUpdatePos > -1)
- {
+ if (lastUpdatePos > -1) {
str = str.substring(lastUpdatePos + lastUpdateStr.length());
str = str.trim();
- try
- {
+ try {
SimpleDateFormat sdf = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss");
- ifModifiedSince = DateUtil.parse(str, sdf);
- }
- catch (ParseException e)
- {
+ lastModified = DateUtil.parse(str, sdf);
+ } catch (ParseException e) {
mLogger.debug("ParseException reading " + str);
}
}
}
-
+
+
/**
* Does the String argument match any of the rules in the blacklist?
- *
+ *
* @param str
* @return
*/
- public boolean isBlacklisted(String str)
- {
+ public boolean isBlacklisted(String str) {
if (str == null || StringUtils.isEmpty(str)) return false;
// First iterate over blacklist, doing indexOf.
// Then iterate over blacklistRegex and test.
- // As soon as there is a hit in either case return true
+ // As soon as there is a hit in either case return true
// test plain String.indexOf
if( testStringRules(str) ) return true;
@@ -282,37 +317,31 @@
return testRegExRules(str);
}
+
/**
* Test String against the RegularExpression rules.
- *
+ *
* @param str
* @return
*/
- private boolean testRegExRules(String str)
- {
+ private boolean testRegExRules(String str) {
boolean hit = false;
Pattern testPattern = null;
Iterator iter = blacklistRegex.iterator();
- while (iter.hasNext())
- {
+ while (iter.hasNext()) {
testPattern = (Pattern)iter.next();
// want to see what it is matching on
// if we are in "debug mode"
- if (mLogger.isDebugEnabled())
- {
+ if (mLogger.isDebugEnabled()) {
Matcher matcher = testPattern.matcher(str);
- if (matcher.find())
- {
+ if (matcher.find()) {
mLogger.debug(matcher.group() + " matched by " + testPattern.pattern());
hit = true;
break;
}
- }
- else
- {
- if (testPattern.matcher(str).find())
- {
+ } else {
+ if (testPattern.matcher(str).find()) {
hit = true;
break;
}
@@ -320,28 +349,25 @@
}
return hit;
}
-
+
+
/**
* Test the String against the String rules,
* using simple indexOf.
- *
+ *
* @param str
* @return
*/
- private boolean testStringRules(String str)
- {
+ private boolean testStringRules(String str) {
String test;
Iterator iter = blacklistStr.iterator();
boolean hit = false;
- while (iter.hasNext())
- {
+ while (iter.hasNext()) {
test = (String)iter.next();
//System.out.println("check against |" + test + "|");
- if (str.indexOf(test) > -1)
- {
+ if (str.indexOf(test) > -1) {
// want to see what it is matching on
- if (mLogger.isDebugEnabled())
- {
+ if (mLogger.isDebugEnabled()) {
mLogger.debug("matched:" + test + ":");
}
hit = true;
@@ -351,87 +377,11 @@
return hit;
}
- /**
- * Try reading blacklist.txt from wherever RollerConfig.getUploadDir()
- * is, otherwise try loading it from web resource (/WEB-INF/).
- */
- private InputStream getFileInputStream()
- {
- try
- {
- // TODO: clean up
- // This was previously throwing an NPE to get to the exception case
- // when being called in several places with indexDir==null.
- // This is just about as bad; it needs to be cleaned up.
- String path = getBlacklistFilePath();
- if (path == null)
- {
- throw new FileNotFoundException(
- "null path (indexDir and realPath both null)");
- }
- return new FileInputStream( path );
- }
- catch (Exception e)
- {
- return getClass().getResourceAsStream("/"+blacklistFile);
- }
- }
-
- /**
- * @param results
- */
- private void writeToFile(String results)
- {
- FileWriter out = null;
- String path = getBlacklistFilePath();
- if (path == null)
- {
- mLogger.debug("Not writing blacklist file since directory paths were null.");
- return;
- }
- try
- {
- // attempt writing results
- out = new FileWriter(path);
- out.write( results.toCharArray() );
- }
- catch (Exception e)
- {
- mLogger.info("Unable to write new " + path);
- }
- finally
- {
- try
- {
- if (out != null) out.close();
- }
- catch (IOException e)
- {
- mLogger.error("Unable to close stream to " + path);
- }
- }
- }
-
- // Added for ROL-612 - TODO: Consider refactoring - nearly duplicate code in FileManagerImpl.
- private String getBlacklistFilePath()
- {
- if (uploadDir == null && realPath==null)
- {
- // to preserve existing behavior forced to interpret this differently
- return null;
- }
- if (uploadDir == null || uploadDir.trim().length() == 0)
- {
- uploadDir = realPath + File.separator + DEFAULT_BLACKLIST_DIR;
- }
- return uploadDir + File.separator + blacklistFile;
- }
-
+
/**
* Return pretty list of String and RegEx rules.
*/
- public String toString()
- {
+ public String toString() {
StringBuffer buf = new StringBuffer("blacklist ");
buf.append(blacklistStr).append("\n");
buf.append("Regex blacklist ").append(blacklistRegex);
Modified: incubator/roller/branches/roller_2.0/src/org/roller/util/CommentSpamChecker.java
URL: http://svn.apache.org/viewcvs/incubator/roller/branches/roller_2.0/src/org/roller/util/CommentSpamChecker.java?rev=291379&r1=291378&r2=291379&view=diff
==============================================================================
--- incubator/roller/branches/roller_2.0/src/org/roller/util/CommentSpamChecker.java (original)
+++ incubator/roller/branches/roller_2.0/src/org/roller/util/CommentSpamChecker.java Sun Sep 25 00:07:20 2005
@@ -14,7 +14,7 @@
public class CommentSpamChecker
{
private static Log mLogger = LogFactory.getLog(CommentSpamChecker.class);
- private Blacklist blacklist = Blacklist.getBlacklist(null,null);
+ private Blacklist blacklist = Blacklist.getBlacklist();
// -----------------------------------------------------------------------
/**
Modified: incubator/roller/branches/roller_2.0/tests/org/roller/BlacklistTest.java
URL: http://svn.apache.org/viewcvs/incubator/roller/branches/roller_2.0/tests/org/roller/BlacklistTest.java?rev=291379&r1=291378&r2=291379&view=diff
==============================================================================
--- incubator/roller/branches/roller_2.0/tests/org/roller/BlacklistTest.java (original)
+++ incubator/roller/branches/roller_2.0/tests/org/roller/BlacklistTest.java Sun Sep 25 00:07:20 2005
@@ -39,7 +39,7 @@
protected void setUp() throws Exception
{
super.setUp();
- blacklist = Blacklist.getBlacklist(null,null);
+ blacklist = Blacklist.getBlacklist();
}
/**