You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@roller.apache.org by ga...@apache.org on 2005/12/18 18:47:19 UTC
svn commit: r357482 - in /incubator/roller/trunk:
src/org/roller/presentation/filters/RefererFilter.java
web/WEB-INF/classes/roller.properties
Author: gangolli
Date: Sun Dec 18 09:47:16 2005
New Revision: 357482
URL: http://svn.apache.org/viewcvs?rev=357482&view=rev
Log:
For ROL-934. Support robot user-agent check in RefererFilter. The property to configure this has been added to roller.properties with the setting commented out. To enable, uncomment it and edit the pattern if/as desired.
Modified:
incubator/roller/trunk/src/org/roller/presentation/filters/RefererFilter.java
incubator/roller/trunk/web/WEB-INF/classes/roller.properties
Modified: incubator/roller/trunk/src/org/roller/presentation/filters/RefererFilter.java
URL: http://svn.apache.org/viewcvs/incubator/roller/trunk/src/org/roller/presentation/filters/RefererFilter.java?rev=357482&r1=357481&r2=357482&view=diff
==============================================================================
--- incubator/roller/trunk/src/org/roller/presentation/filters/RefererFilter.java (original)
+++ incubator/roller/trunk/src/org/roller/presentation/filters/RefererFilter.java Sun Dec 18 09:47:16 2005
@@ -1,6 +1,8 @@
package org.roller.presentation.filters;
import java.io.IOException;
+import java.util.regex.Pattern;
+import java.util.regex.PatternSyntaxException;
import javax.servlet.Filter;
import javax.servlet.FilterChain;
@@ -17,95 +19,107 @@
import org.roller.model.RollerFactory;
import org.roller.presentation.RollerContext;
import org.roller.presentation.RollerRequest;
+import org.roller.config.RollerConfig;
/**
* Keep track of referers.
*
- * @web.filter name="RefererFilter"
- *
* @author David M. Johnson
+ * @web.filter name="RefererFilter"
*/
public class RefererFilter implements Filter {
- private FilterConfig mFilterConfig = null;
- private static Log mLogger =
- LogFactory.getFactory().getInstance(RefererFilter.class);
-
+ private FilterConfig mFilterConfig = null;
+ private static Log mLogger = LogFactory.getFactory().getInstance(RefererFilter.class);
+ private static Pattern robotPattern = null;
+ private static final String ROBOT_PATTERN_PROP_NAME = "referrer.robotCheck.userAgentPattern";
+
/**
* destroy
*/
public void destroy() {
}
-
+
/**
* doFilter
*/
- public void doFilter(
- ServletRequest req, ServletResponse res, FilterChain chain)
- throws IOException, ServletException {
- HttpServletRequest request = (HttpServletRequest)req;
+ public void doFilter(ServletRequest req, ServletResponse res, FilterChain chain) throws IOException, ServletException {
+ HttpServletRequest request = (HttpServletRequest) req;
boolean isRefSpammer = false;
+ boolean isRobot = false;
+
try {
- RollerRequest rreq = RollerRequest.getRollerRequest(request);
- RollerContext rctx = RollerContext.getRollerContext(
- mFilterConfig.getServletContext());
-
- if (rreq!=null && rreq.getWebsite() != null) {
- String handle = rreq.getWebsite().getHandle();
-
- // Base page URLs, with and without www.
- String basePageUrlWWW =
- rctx.getAbsoluteContextUrl(request)+"/page/"+handle;
- String basePageUrl = basePageUrlWWW;
- if ( basePageUrlWWW.startsWith("http://www.") ) {
- // chop off the http://www.
- basePageUrl = "http://"+basePageUrlWWW.substring(11);
- }
-
- // Base comment URLs, with and without www.
- String baseCommentsUrlWWW =
- rctx.getAbsoluteContextUrl(request)+"/comments/"+handle;
- String baseCommentsUrl = baseCommentsUrlWWW;
- if ( baseCommentsUrlWWW.startsWith("http://www.") ) {
- // chop off the http://www.
- baseCommentsUrl= "http://"+baseCommentsUrlWWW.substring(11);
- }
-
- // Don't process hits from same user's blogs as referers by
- // ignoring Don't process referer from pages that start with base URLs.
- String referer = request.getHeader("Referer");
- if ( referer==null ||
- (
- !referer.startsWith( basePageUrl )
- && !referer.startsWith( basePageUrlWWW )
- && !referer.startsWith( baseCommentsUrl )
- && !referer.startsWith( baseCommentsUrlWWW )
- )
- ) {
- RefererManager refMgr =
- RollerFactory.getRoller().getRefererManager();
- isRefSpammer = refMgr.processRequest(rreq);
- } else {
- if (mLogger.isDebugEnabled()) {
- mLogger.debug("Ignoring referer="+referer);
+ if (robotPattern != null) {
+ // If the pattern is present, we check for whether the User-Agent matches,
+ // and set isRobot if so. Currently, all referral processing, including
+ // spam check, is skipped for robots identified in this way.
+ String userAgent = request.getHeader("User-Agent");
+ isRobot = (userAgent != null && userAgent.length() > 0 && robotPattern.matcher(userAgent).matches());
+ }
+
+ if (!isRobot) {
+ RollerRequest rreq = RollerRequest.getRollerRequest(request);
+ RollerContext rctx = RollerContext.getRollerContext(mFilterConfig.getServletContext());
+
+ if (rreq != null && rreq.getWebsite() != null) {
+ String handle = rreq.getWebsite().getHandle();
+
+ // Base page URLs, with and without www.
+ String basePageUrlWWW = rctx.getAbsoluteContextUrl(request) + "/page/" + handle;
+ String basePageUrl = basePageUrlWWW;
+ if (basePageUrlWWW.startsWith("http://www.")) {
+ // chop off the http://www.
+ basePageUrl = "http://" + basePageUrlWWW.substring(11);
+ }
+
+ // Base comment URLs, with and without www.
+ String baseCommentsUrlWWW = rctx.getAbsoluteContextUrl(request) + "/comments/" + handle;
+ String baseCommentsUrl = baseCommentsUrlWWW;
+ if (baseCommentsUrlWWW.startsWith("http://www.")) {
+ // chop off the http://www.
+ baseCommentsUrl = "http://" + baseCommentsUrlWWW.substring(11);
+ }
+
+ // Don't process hits from same user's blogs as referers by
+ // ignoring Don't process referer from pages that start with base URLs.
+ String referer = request.getHeader("Referer");
+ if (referer == null || (!referer.startsWith(basePageUrl) && !referer.startsWith(basePageUrlWWW) && !referer.startsWith(baseCommentsUrl) && !referer.startsWith(baseCommentsUrlWWW)))
+ {
+ RefererManager refMgr = RollerFactory.getRoller().getRefererManager();
+ isRefSpammer = refMgr.processRequest(rreq);
+ } else {
+ if (mLogger.isDebugEnabled()) {
+ mLogger.debug("Ignoring referer=" + referer);
+ }
}
}
}
} catch (Exception e) {
- mLogger.error("Processing referer",e);
+ mLogger.error("Processing referer", e);
}
-
+
if (isRefSpammer) {
- HttpServletResponse response = (HttpServletResponse)res;
+ HttpServletResponse response = (HttpServletResponse) res;
response.sendError(HttpServletResponse.SC_FORBIDDEN);
} else {
chain.doFilter(req, res);
}
}
-
+
/**
* init
*/
public void init(FilterConfig filterConfig) throws ServletException {
mFilterConfig = filterConfig;
+ String robotPatternStr = RollerConfig.getProperty(ROBOT_PATTERN_PROP_NAME);
+ if (robotPatternStr != null && robotPatternStr.length() >0) {
+ // Parse the pattern, and store the compiled form.
+ try {
+ robotPattern = Pattern.compile(robotPatternStr);
+ } catch (Exception e) {
+ // Most likely a PatternSyntaxException; log and continue as if it is not set.
+ mLogger.error("Error parsing "+ ROBOT_PATTERN_PROP_NAME + " value '" +
+ robotPatternStr + "'. Robots will not be filtered. ", e);
+ }
+ }
}
}
Modified: incubator/roller/trunk/web/WEB-INF/classes/roller.properties
URL: http://svn.apache.org/viewcvs/incubator/roller/trunk/web/WEB-INF/classes/roller.properties?rev=357482&r1=357481&r2=357482&view=diff
==============================================================================
--- incubator/roller/trunk/web/WEB-INF/classes/roller.properties (original)
+++ incubator/roller/trunk/web/WEB-INF/classes/roller.properties Sun Dec 18 09:47:16 2005
@@ -207,6 +207,14 @@
# trackback.allowedURLs=http://w3.ibm.com/.*||http://another.example.com/.*
trackback.allowedURLs=
+#Robot check in referral processing. If this pattern is set and the User-Agent in the
+#request matches this pattern, all referral processing is skipped; this means that
+#the referral spam check is skipped, the request is allowed to proceed, but the
+#referrer is not recorded and hit count is not incremented. Recommended for large sites
+#that get a lot of legitimate crawler bot traffic. The pattern here is a suggestion that
+#has been reported to work well.
+#referrer.robotCheck.userAgentPattern=.*(slurp|bot|java).*
+
#----------------------------------
# ping settings