You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@roller.apache.org by sn...@apache.org on 2012/05/19 17:30:50 UTC

svn commit: r1340501 - in /roller/branches/roller_5.0: weblogger-business/ weblogger-business/src/main/java/org/apache/roller/weblogger/business/plugins/ weblogger-business/src/main/java/org/apache/roller/weblogger/pojos/ weblogger-business/src/main/ja...

Author: snoopdave
Date: Sat May 19 15:30:49 2012
New Revision: 1340501

URL: http://svn.apache.org/viewvc?rev=1340501&view=rev
Log:
new startup property 'weblogAdminsUntrusted' causes weblog content, title, description and user fields to be HTML sanitized, i.e. any tags or attributes that could contain JavaScript are either omitted or cleaned up.

Added:
    roller/branches/roller_5.0/weblogger-business/src/main/java/org/apache/roller/weblogger/util/HTMLSanitizer.java
      - copied, changed from r1337602, roller/branches/roller_5.0/weblogger-business/src/main/java/org/apache/roller/weblogger/business/plugins/comment/HTMLSubsetPlugin.java
Modified:
    roller/branches/roller_5.0/weblogger-business/pom.xml
    roller/branches/roller_5.0/weblogger-business/src/main/java/org/apache/roller/weblogger/business/plugins/PluginManagerImpl.java
    roller/branches/roller_5.0/weblogger-business/src/main/java/org/apache/roller/weblogger/pojos/WeblogEntry.java
    roller/branches/roller_5.0/weblogger-business/src/main/java/org/apache/roller/weblogger/pojos/wrapper/UserWrapper.java
    roller/branches/roller_5.0/weblogger-business/src/main/java/org/apache/roller/weblogger/pojos/wrapper/WeblogEntryWrapper.java
    roller/branches/roller_5.0/weblogger-business/src/main/java/org/apache/roller/weblogger/pojos/wrapper/WeblogWrapper.java
    roller/branches/roller_5.0/weblogger-web/src/main/java/org/apache/roller/weblogger/ui/core/filters/LoadSaltFilter.java
    roller/branches/roller_5.0/weblogger-web/src/main/java/org/apache/roller/weblogger/ui/core/filters/ValidateSaltFilter.java
    roller/branches/roller_5.0/weblogger-web/src/main/java/org/apache/roller/weblogger/ui/rendering/plugins/comments/AkismetCommentValidator.java
    roller/branches/roller_5.0/weblogger-web/src/main/java/org/apache/roller/weblogger/ui/rendering/util/cache/SaltCache.java

Modified: roller/branches/roller_5.0/weblogger-business/pom.xml
URL: http://svn.apache.org/viewvc/roller/branches/roller_5.0/weblogger-business/pom.xml?rev=1340501&r1=1340500&r2=1340501&view=diff
==============================================================================
--- roller/branches/roller_5.0/weblogger-business/pom.xml (original)
+++ roller/branches/roller_5.0/weblogger-business/pom.xml Sat May 19 15:30:49 2012
@@ -196,7 +196,12 @@
             <artifactId>derbyclient</artifactId>
             <scope>provided</scope>
         </dependency>
-
+        <dependency>
+            <groupId>commons-validator</groupId>
+            <artifactId>commons-validator</artifactId>
+            <version>1.3.1</version>
+            <type>jar</type>
+        </dependency>
     </dependencies>
 
     <build>

Modified: roller/branches/roller_5.0/weblogger-business/src/main/java/org/apache/roller/weblogger/business/plugins/PluginManagerImpl.java
URL: http://svn.apache.org/viewvc/roller/branches/roller_5.0/weblogger-business/src/main/java/org/apache/roller/weblogger/business/plugins/PluginManagerImpl.java?rev=1340501&r1=1340500&r2=1340501&view=diff
==============================================================================
--- roller/branches/roller_5.0/weblogger-business/src/main/java/org/apache/roller/weblogger/business/plugins/PluginManagerImpl.java (original)
+++ roller/branches/roller_5.0/weblogger-business/src/main/java/org/apache/roller/weblogger/business/plugins/PluginManagerImpl.java Sat May 19 15:30:49 2012
@@ -32,6 +32,7 @@ import org.apache.roller.weblogger.pojos
 import org.apache.commons.lang.StringUtils;
 import org.apache.roller.weblogger.business.plugins.comment.WeblogEntryCommentPlugin;
 import org.apache.roller.weblogger.pojos.WeblogEntryComment;
+import org.apache.roller.weblogger.util.HTMLSanitizer;
 
 
 /**
@@ -85,11 +86,12 @@ public class PluginManagerImpl implement
         return ret;
     }
     
-    
     public String applyWeblogEntryPlugins(Map pagePlugins,WeblogEntry entry, String str) {
+
         String ret = str;
         WeblogEntry copy = new WeblogEntry(entry);
         List entryPlugins = copy.getPluginsList();
+
         if (entryPlugins != null && !entryPlugins.isEmpty()) {
             Iterator iter = entryPlugins.iterator();
             while (iter.hasNext()) {
@@ -102,7 +104,8 @@ public class PluginManagerImpl implement
                 }
             }
         }
-        return ret;
+
+        return HTMLSanitizer.conditionallySanitize(ret);
     }
     
     

Modified: roller/branches/roller_5.0/weblogger-business/src/main/java/org/apache/roller/weblogger/pojos/WeblogEntry.java
URL: http://svn.apache.org/viewvc/roller/branches/roller_5.0/weblogger-business/src/main/java/org/apache/roller/weblogger/pojos/WeblogEntry.java?rev=1340501&r1=1340500&r2=1340501&view=diff
==============================================================================
--- roller/branches/roller_5.0/weblogger-business/src/main/java/org/apache/roller/weblogger/pojos/WeblogEntry.java (original)
+++ roller/branches/roller_5.0/weblogger-business/src/main/java/org/apache/roller/weblogger/pojos/WeblogEntry.java Sat May 19 15:30:49 2012
@@ -28,7 +28,6 @@ import java.util.Arrays;
 import java.util.Calendar;
 import java.util.Collections;
 import java.util.Date;
-import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;
 import java.util.List;
@@ -54,6 +53,8 @@ import org.apache.roller.util.DateUtil;
 import org.apache.roller.weblogger.util.I18nMessages;
 import org.apache.roller.util.UUIDGenerator;
 import org.apache.roller.weblogger.business.UserManager;
+import org.apache.roller.weblogger.config.WebloggerConfig;
+import org.apache.roller.weblogger.util.HTMLSanitizer;
 import org.apache.roller.weblogger.util.Utilities;
 
 /**
@@ -1172,8 +1173,8 @@ public class WeblogEntry implements Seri
                     }
                 }
             }
-        }        
-        return ret;
+        } 
+        return HTMLSanitizer.conditionallySanitize(ret);
     }
     
     
@@ -1220,7 +1221,7 @@ public class WeblogEntry implements Seri
             }
         }
         
-        return displayContent;
+        return HTMLSanitizer.conditionallySanitize(displayContent);
     }
     
     

Modified: roller/branches/roller_5.0/weblogger-business/src/main/java/org/apache/roller/weblogger/pojos/wrapper/UserWrapper.java
URL: http://svn.apache.org/viewvc/roller/branches/roller_5.0/weblogger-business/src/main/java/org/apache/roller/weblogger/pojos/wrapper/UserWrapper.java?rev=1340501&r1=1340500&r2=1340501&view=diff
==============================================================================
--- roller/branches/roller_5.0/weblogger-business/src/main/java/org/apache/roller/weblogger/pojos/wrapper/UserWrapper.java (original)
+++ roller/branches/roller_5.0/weblogger-business/src/main/java/org/apache/roller/weblogger/pojos/wrapper/UserWrapper.java Sat May 19 15:30:49 2012
@@ -21,6 +21,7 @@ package org.apache.roller.weblogger.pojo
 import java.util.Date;
 import org.apache.roller.weblogger.config.WebloggerConfig;
 import org.apache.roller.weblogger.pojos.User;
+import org.apache.roller.weblogger.util.HTMLSanitizer;
 
 
 /**
@@ -59,12 +60,12 @@ public class UserWrapper {
     
     
     public String getScreenName() {
-        return this.pojo.getScreenName();
+        return HTMLSanitizer.conditionallySanitize(this.pojo.getScreenName());
     }
     
     
     public String getFullName() {
-        return this.pojo.getFullName();
+        return HTMLSanitizer.conditionallySanitize(this.pojo.getFullName());
     }
     
     

Modified: roller/branches/roller_5.0/weblogger-business/src/main/java/org/apache/roller/weblogger/pojos/wrapper/WeblogEntryWrapper.java
URL: http://svn.apache.org/viewvc/roller/branches/roller_5.0/weblogger-business/src/main/java/org/apache/roller/weblogger/pojos/wrapper/WeblogEntryWrapper.java?rev=1340501&r1=1340500&r2=1340501&view=diff
==============================================================================
--- roller/branches/roller_5.0/weblogger-business/src/main/java/org/apache/roller/weblogger/pojos/wrapper/WeblogEntryWrapper.java (original)
+++ roller/branches/roller_5.0/weblogger-business/src/main/java/org/apache/roller/weblogger/pojos/wrapper/WeblogEntryWrapper.java Sat May 19 15:30:49 2012
@@ -30,6 +30,7 @@ import org.apache.roller.weblogger.pojos
 import org.apache.roller.weblogger.pojos.WeblogEntryComment;
 import org.apache.roller.weblogger.pojos.WeblogEntryTag;
 import org.apache.roller.weblogger.pojos.WeblogReferrer;
+import org.apache.roller.weblogger.util.HTMLSanitizer;
 
 
 /**
@@ -99,12 +100,12 @@ public class WeblogEntryWrapper {
     
     
     public String getTitle() {
-        return this.pojo.getTitle();
-    }
-    
+        return HTMLSanitizer.conditionallySanitize(this.pojo.getTitle());
+	}
+
     
     public String getSummary() {
-        return this.pojo.getSummary();
+        return HTMLSanitizer.conditionallySanitize(this.pojo.getSummary());
     }
     
     /**
@@ -113,7 +114,7 @@ public class WeblogEntryWrapper {
      * Simply returns the same value that the pojo would have returned.
      */
     public String getText() {
-        return this.pojo.getText();
+        return HTMLSanitizer.conditionallySanitize(this.pojo.getText());
     }
     
     

Modified: roller/branches/roller_5.0/weblogger-business/src/main/java/org/apache/roller/weblogger/pojos/wrapper/WeblogWrapper.java
URL: http://svn.apache.org/viewvc/roller/branches/roller_5.0/weblogger-business/src/main/java/org/apache/roller/weblogger/pojos/wrapper/WeblogWrapper.java?rev=1340501&r1=1340500&r2=1340501&view=diff
==============================================================================
--- roller/branches/roller_5.0/weblogger-business/src/main/java/org/apache/roller/weblogger/pojos/wrapper/WeblogWrapper.java (original)
+++ roller/branches/roller_5.0/weblogger-business/src/main/java/org/apache/roller/weblogger/pojos/wrapper/WeblogWrapper.java Sat May 19 15:30:49 2012
@@ -34,6 +34,7 @@ import org.apache.roller.weblogger.pojos
 import org.apache.roller.weblogger.pojos.WeblogEntry;
 import org.apache.roller.weblogger.pojos.WeblogEntryComment;
 import org.apache.roller.weblogger.pojos.WeblogReferrer;
+import org.apache.roller.weblogger.util.HTMLSanitizer;
 
 
 /**
@@ -117,7 +118,7 @@ public class WeblogWrapper {
     
     
     public String getDescription() {
-        return this.pojo.getDescription();
+        return HTMLSanitizer.conditionallySanitize(this.pojo.getDescription());
     }
     
     
@@ -297,7 +298,7 @@ public class WeblogWrapper {
     
     
     public String getAbout() {
-        return this.pojo.getAbout();
+        return HTMLSanitizer.conditionallySanitize(this.pojo.getAbout());
     }
     
     

Copied: roller/branches/roller_5.0/weblogger-business/src/main/java/org/apache/roller/weblogger/util/HTMLSanitizer.java (from r1337602, roller/branches/roller_5.0/weblogger-business/src/main/java/org/apache/roller/weblogger/business/plugins/comment/HTMLSubsetPlugin.java)
URL: http://svn.apache.org/viewvc/roller/branches/roller_5.0/weblogger-business/src/main/java/org/apache/roller/weblogger/util/HTMLSanitizer.java?p2=roller/branches/roller_5.0/weblogger-business/src/main/java/org/apache/roller/weblogger/util/HTMLSanitizer.java&p1=roller/branches/roller_5.0/weblogger-business/src/main/java/org/apache/roller/weblogger/business/plugins/comment/HTMLSubsetPlugin.java&r1=1337602&r2=1340501&rev=1340501&view=diff
==============================================================================
--- roller/branches/roller_5.0/weblogger-business/src/main/java/org/apache/roller/weblogger/business/plugins/comment/HTMLSubsetPlugin.java (original)
+++ roller/branches/roller_5.0/weblogger-business/src/main/java/org/apache/roller/weblogger/util/HTMLSanitizer.java Sat May 19 15:30:49 2012
@@ -16,60 +16,503 @@
  * directory of this distribution.
  */
 
-package org.apache.roller.weblogger.business.plugins.comment;
+/**
+Copyright (c) 2009 Open Lab, http://www.open-lab.com/
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
 
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.roller.weblogger.pojos.WeblogEntryComment;
-import org.apache.roller.weblogger.util.Utilities;
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
 
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
 
-/**
- * Transforms the given String into a subset of HTML.
- */
-public class HTMLSubsetPlugin implements WeblogEntryCommentPlugin {
-    private static final Log log = LogFactory.getLog(HTMLSubsetPlugin.class);
-    
-    
-    public HTMLSubsetPlugin() {
-        log.debug("Instantiating HTMLSubsetPlugin");
-    }
-    
-    
-    /**
-     * Unique identifier.  This should never change. 
-     */
-    public String getId() {
-        return "HTMLSubset";
-    }
-    
-    
-    public String getName() {
-        return "HTML Subset Restriction";
-    }
-    
-    
-    public String getDescription() {
-        return "Transforms the given comment body into a subset of HTML";
-    }
-    
-    
-    public String render(final WeblogEntryComment comment, String text) {
-        String output = text;
-        
-        // only do this if comment is HTML
-        if ("text/html".equals(comment.getContentType())) {
-            log.debug("ending value:\n" + output);
-            	        
-	        // escape html
-	        output = Utilities.escapeHTML(output);
-	        
-	        // just use old utilities method
-	        output = Utilities.transformToHTMLSubset(output);
-	        log.debug("starting value:\n"+text);
-        }
-                
-        return output;
-    }
-    
+package org.apache.roller.weblogger.util;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Stack;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import org.apache.commons.validator.UrlValidator;
+import org.apache.roller.weblogger.config.WebloggerConfig;
+
+public class HTMLSanitizer {
+	public static Boolean xssEnabled = WebloggerConfig.getBooleanProperty("weblogAdminsUntrusted", Boolean.FALSE);	
+
+	public static Pattern forbiddenTags = Pattern.compile("^(script|object|embed|link|style|form|input)$");
+	public static Pattern allowedTags = Pattern.compile("^(b|p|i|s|a|img|table|thead|tbody|tfoot|tr|th|td|dd|dl|dt|em|h1|h2|h3|h4|h5|h6|li|ul|ol|span|div|strike|strong|"
+			+ "sub|sup|pre|del|code|blockquote|strike|kbd|br|hr|area|map|object|embed|param|link|form|small|big)$");
+	private static Pattern commentPattern = Pattern.compile("<!--.*");  // <!--.........>
+	private static Pattern tagStartPattern = Pattern.compile("<(?i)(\\w+\\b)\\s*(.*)/?>$");  // <tag ....props.....>
+	private static Pattern tagClosePattern = Pattern.compile("</(?i)(\\w+\\b)\\s*>$");  // </tag .........>
+	private static Pattern standAloneTags = Pattern.compile("^(img|br|hr)$");
+	private static Pattern selfClosed = Pattern.compile("<.+/>");
+	private static Pattern attributesPattern = Pattern.compile("(\\w*)\\s*=\\s*\"([^\"]*)\"");  // prop="...."
+	private static Pattern stylePattern = Pattern.compile("([^\\s^:]+)\\s*:\\s*([^;]+);?");  // color:red;
+	private static Pattern urlStylePattern = Pattern.compile("(?i).*\\b\\s*url\\s*\\(['\"]([^)]*)['\"]\\)");  // url('....')"
+	public static Pattern forbiddenStylePattern = Pattern.compile("(?:(expression|eval|javascript))\\s*\\(");  // expression(....)"   thanks to Ben Summer
+
+	/**
+	 *  This method should be used to test input.
+	 *
+	 * @param html
+	 * @return true if the input is "valid"
+	 */
+	public static boolean isSanitized(String html) {
+		return sanitizer(html).isValid;
+	}
+
+	/**
+	 * Used to clean every html before to output it in any html page
+	 *
+	 * @param html
+	 * @return sanitized html
+	 */
+	public static String sanitize(String html) {
+		return sanitizer(html).html;
+	}
+
+	public static String conditionallySanitize(String ret) {
+		// if XSS is enabled then sanitize HTML
+		if (xssEnabled) {
+			ret = HTMLSanitizer.sanitize(ret);
+		}
+		return ret;
+	}
+
+	/**
+	 * Used to get the text,  tags removed or encoded
+	 *
+	 * @param html
+	 * @return sanitized text
+	 */
+	public static String getText(String html) {
+		return sanitizer(html).text;
+	}
+
+	/**
+	 * This is the main method of sanitizing. It will be used both for validation and cleaning
+	 *
+	 * @param html
+	 * @return a SanitizeResult object
+	 */
+	public static SanitizeResult sanitizer(String html) {
+		return sanitizer(html, allowedTags, forbiddenTags);
+	}
+
+	public static SanitizeResult sanitizer(String html, Pattern allowedTags, Pattern forbiddenTags) {
+		SanitizeResult ret = new SanitizeResult();
+		Stack<String> openTags = new Stack();
+
+
+		List<String> tokens = tokenize(html);
+
+		// -------------------   LOOP for every token --------------------------
+		for (String token : tokens) {
+			boolean isAcceptedToken = false;
+
+			Matcher startMatcher = tagStartPattern.matcher(token);
+			Matcher endMatcher = tagClosePattern.matcher(token);
+
+
+			//--------------------------------------------------------------------------------  COMMENT    <!-- ......... -->
+			if (commentPattern.matcher(token).find()) {
+				ret.val = ret.val + token + (token.endsWith("-->") ? "" : "-->");
+				ret.invalidTags.add(token + (token.endsWith("-->") ? "" : "-->"));
+				continue;
+
+
+				//--------------------------------------------------------------------------------  OPEN TAG    <tag .........>
+			} else if (startMatcher.find()) {
+
+				//tag name extraction
+				String tag = startMatcher.group(1).toLowerCase();
+
+
+				//-----------------------------------------------------  FORBIDDEN TAG   <script .........>
+				if (forbiddenTags.matcher(tag).find()) {
+					ret.invalidTags.add("<" + tag + ">");
+					continue;
+
+
+					// --------------------------------------------------  WELL KNOWN TAG
+				} else if (allowedTags.matcher(tag).find()) {
+
+
+					String cleanToken = "<" + tag;
+					String tokenBody = startMatcher.group(2);
+
+
+					//first test table consistency
+					//table tbody tfoot thead th tr td
+					if ("thead".equals(tag) || "tbody".equals(tag) || "tfoot".equals(tag) || "tr".equals(tag)) {
+						if (openTags.search("table") < 1) {
+							ret.invalidTags.add("<" + tag + ">");
+							continue;
+						}
+					} else if ("td".equals(tag) || "th".equals(tag)) {
+						if (openTags.search("tr") < 1) {
+							ret.invalidTags.add("<" + tag + ">");
+							continue;
+						}
+					}
+
+
+					// then test properties
+					Matcher attributes = attributesPattern.matcher(tokenBody);
+
+					boolean foundURL = false; // URL flag
+					while (attributes.find()) {
+
+						String attr = attributes.group(1).toLowerCase();
+						String val = attributes.group(2);
+
+						// we will accept href in case of <A>
+						if ("a".equals(tag) && "href".equals(attr)) {    // <a href="......">
+							String[] customSchemes = {"http", "https"};
+							if (new UrlValidator(customSchemes).isValid(val)) {
+								foundURL = true;
+							} else {
+								// may be it is a mailto?
+								//  case <a href="mailto:pippo@pippo.com?subject=...."
+								if (val.toLowerCase().startsWith("mailto:") && val.indexOf("@") >= 0) {
+									String val1 = "http://www." + val.substring(val.indexOf("@") + 1);
+									if (new UrlValidator(customSchemes).isValid(val1)) {
+										foundURL = true;
+									} else {
+										ret.invalidTags.add(attr + " " + val);
+										val = "";
+									}
+								} else {
+									ret.invalidTags.add(attr + " " + val);
+									val = "";
+								}
+							}
+
+						} else if (tag.matches("img|embed") && "src".equals(attr)) { // <img src="......">
+							String[] customSchemes = {"http", "https"};
+							if (new UrlValidator(customSchemes).isValid(val)) {
+								foundURL = true;
+							} else {
+								ret.invalidTags.add(attr + " " + val);
+								val = "";
+							}
+
+						} else if ("href".equals(attr) || "src".equals(attr)) { // <tag src/href="......">   skipped
+							ret.invalidTags.add(tag + " " + attr + " " + val);
+							continue;
+
+
+						} else if (attr.matches("width|height")) { // <tag width/height="......">
+							if (!val.toLowerCase().matches("\\d+%|\\d+$")) { // test numeric values
+								ret.invalidTags.add(tag + " " + attr + " " + val);
+								continue;
+							}
+
+						} else if ("style".equals(attr)) { // <tag style="......">
+
+
+							// then test properties
+							Matcher styles = stylePattern.matcher(val);
+							String cleanStyle = "";
+
+							while (styles.find()) {
+								String styleName = styles.group(1).toLowerCase();
+								String styleValue = styles.group(2);
+
+								// suppress invalid styles values
+								if (forbiddenStylePattern.matcher(styleValue).find()) {
+									ret.invalidTags.add(tag + " " + attr + " " + styleValue);
+									continue;
+								}
+
+								// check if valid url
+								Matcher urlStyleMatcher = urlStylePattern.matcher(styleValue);
+								if (urlStyleMatcher.find()) {
+									String[] customSchemes = {"http", "https"};
+									String url = urlStyleMatcher.group(1);
+									if (!new UrlValidator(customSchemes).isValid(url)) {
+										ret.invalidTags.add(tag + " " + attr + " " + styleValue);
+										continue;
+									}
+								}
+
+								cleanStyle = cleanStyle + styleName + ":" + encode(styleValue) + ";";
+
+							}
+							val = cleanStyle;
+
+						} else if (attr.startsWith("on")) {  // skip all javascript events
+							ret.invalidTags.add(tag + " " + attr + " " + val);
+							continue;
+
+						} else {  // by default encode all properies
+							val = encode(val);
+						}
+
+						cleanToken = cleanToken + " " + attr + "=\"" + val + "\"";
+					}
+					cleanToken = cleanToken + ">";
+
+					isAcceptedToken = true;
+
+					// for <img> and <a>
+					if (tag.matches("a|img|embed") && !foundURL) {
+						isAcceptedToken = false;
+						cleanToken = "";
+					}
+
+					token = cleanToken;
+
+
+					// push the tag if require closure and it is accepted (otherwirse is encoded)
+					if (isAcceptedToken && !(standAloneTags.matcher(tag).find() || selfClosed.matcher(tag).find())) {
+						openTags.push(tag);
+					}
+
+					// --------------------------------------------------------------------------------  UNKNOWN TAG
+				} else {
+					ret.invalidTags.add(token);
+					ret.val = ret.val + token;
+					continue;
+
+
+				}
+
+				// --------------------------------------------------------------------------------  CLOSE TAG </tag>
+			} else if (endMatcher.find()) {
+				String tag = endMatcher.group(1).toLowerCase();
+
+				//is self closing
+				if (selfClosed.matcher(tag).find()) {
+					ret.invalidTags.add(token);
+					continue;
+				}
+				if (forbiddenTags.matcher(tag).find()) {
+					ret.invalidTags.add("/" + tag);
+					continue;
+				}
+				if (!allowedTags.matcher(tag).find()) {
+					ret.invalidTags.add(token);
+					ret.val = ret.val + token;
+					continue;
+				} else {
+
+
+					String cleanToken = "";
+
+					// check tag position in the stack
+					int pos = openTags.search(tag);
+					// if found on top ok
+					for (int i = 1; i <= pos; i++) {
+						//pop all elements before tag and close it
+						String poppedTag = openTags.pop();
+						cleanToken = cleanToken + "</" + poppedTag + ">";
+						isAcceptedToken = true;
+					}
+
+					token = cleanToken;
+				}
+
+			}
+
+			ret.val = ret.val + token;
+
+			if (isAcceptedToken) {
+				ret.html = ret.html + token;
+				//ret.text = ret.text + " ";
+			} else {
+				String sanToken = htmlEncodeApexesAndTags(token);
+				ret.html = ret.html + sanToken;
+				ret.text = ret.text + htmlEncodeApexesAndTags(removeLineFeed(token));
+			}
+
+
+		}
+
+		// must close remaining tags
+		while (openTags.size() > 0) {
+			//pop all elements before tag and close it
+			String poppedTag = openTags.pop();
+			ret.html = ret.html + "</" + poppedTag + ">";
+			ret.val = ret.val + "</" + poppedTag + ">";
+		}
+
+		//set boolean value
+		ret.isValid = ret.invalidTags.size() == 0;
+
+		return ret;
+	}
+
+	/**
+	 * Splits html tag and tag content <......>.
+	 *
+	 * @param html
+	 * @return a list of token
+	 */
+	private static List<String> tokenize(String html) {
+		ArrayList tokens = new ArrayList();
+		int pos = 0;
+		String token = "";
+		int len = html.length();
+		while (pos < len) {
+			char c = html.charAt(pos);
+
+			String ahead = html.substring(pos, pos > len - 4 ? len : pos + 4);
+
+			//a comment is starting
+			if ("<!--".equals(ahead)) {
+				//store the current token
+				if (token.length() > 0) {
+					tokens.add(token);
+				}
+
+				//clear the token
+				token = "";
+
+				// serch the end of <......>
+				int end = moveToMarkerEnd(pos, "-->", html);
+				tokens.add(html.substring(pos, end));
+				pos = end;
+
+
+				// a new "<" token is starting
+			} else if ('<' == c) {
+
+				//store the current token
+				if (token.length() > 0) {
+					tokens.add(token);
+				}
+
+				//clear the token
+				token = "";
+
+				// serch the end of <......>
+				int end = moveToMarkerEnd(pos, ">", html);
+				tokens.add(html.substring(pos, end));
+				pos = end;
+
+			} else {
+				token = token + c;
+				pos++;
+			}
+
+		}
+
+		//store the last token
+		if (token.length() > 0) {
+			tokens.add(token);
+		}
+
+		return tokens;
+	}
+
+	private static int moveToMarkerEnd(int pos, String marker, String s) {
+		int i = s.indexOf(marker, pos);
+		if (i > -1) {
+			pos = i + marker.length();
+		} else {
+			pos = s.length();
+		}
+		return pos;
+	}
+
+	/**
+	 * Contains the sanitizing results.
+	 * html is the sanitized html encoded  ready to be printed. Unaccepted tag are encode, text inside tag is always encoded    MUST BE USED WHEN PRINTING HTML
+	 * text is the text inside valid tags. Contains invalid tags encoded                                                        SHOULD BE USED TO PRINT EXCERPTS
+	 * val  is the html source cleaned from unaccepted tags. It is not encoded:                                                 SHOULD BE USED IN SAVE ACTIONS
+	 * isValid is true when every tag is accepted without forcing encoding
+	 * invalidTags is the list of encoded-killed tags
+	 */
+	static class SanitizeResult {
+
+		public String html = "";
+		public String text = "";
+		public String val = "";
+		public boolean isValid = true;
+		public List<String> invalidTags = new ArrayList();
+	}
+
+	public static String encode(String s) {
+		return convertLineFeedToBR(htmlEncodeApexesAndTags(s == null ? "" : s));
+	}
+
+	public static final String htmlEncodeApexesAndTags(String source) {
+		return htmlEncodeTag(htmlEncodeApexes(source));
+	}
+
+	public static final String htmlEncodeApexes(String source) {
+		if (source != null) {
+			String result = replaceAllNoRegex(source, new String[]{"\"", "'"}, new String[]{"&quot;", "&#39;"});
+			return result;
+		} else {
+			return null;
+		}
+	}
+
+	public static final String htmlEncodeTag(String source) {
+		if (source != null) {
+			String result = replaceAllNoRegex(source, new String[]{"<", ">"}, new String[]{"&lt;", "&gt;"});
+			return result;
+		} else {
+			return null;
+		}
+	}
+
+	public static String convertLineFeedToBR(String text) {
+		if (text != null) {
+			return replaceAllNoRegex(text, new String[]{"\n", "\f", "\r"}, new String[]{"<br>", "<br>", " "});
+		} else {
+			return null;
+		}
+	}
+
+	public static String removeLineFeed(String text) {
+
+		if (text != null) {
+			return replaceAllNoRegex(text, new String[]{"\n", "\f", "\r"}, new String[]{" ", " ", " "});
+		} else {
+			return null;
+		}
+	}
+
+	public static final String replaceAllNoRegex(String source, String searches[], String replaces[]) {
+		int k;
+		String tmp = source;
+		for (k = 0; k < searches.length; k++) {
+			tmp = replaceAllNoRegex(tmp, searches[k], replaces[k]);
+		}
+		return tmp;
+	}
+
+	public static final String replaceAllNoRegex(String source, String search, String replace) {
+		StringBuffer buffer = new StringBuffer();
+		if (source != null) {
+			if (search.length() == 0) {
+				return source;
+			}
+			int oldPos, pos;
+			for (oldPos = 0, pos = source.indexOf(search, oldPos); pos != -1; oldPos = pos + search.length(), pos = source.indexOf(search, oldPos)) {
+				buffer.append(source.substring(oldPos, pos));
+				buffer.append(replace);
+			}
+			if (oldPos < source.length()) {
+				buffer.append(source.substring(oldPos));
+			}
+		}
+		return new String(buffer);
+	}
 }

Modified: roller/branches/roller_5.0/weblogger-web/src/main/java/org/apache/roller/weblogger/ui/core/filters/LoadSaltFilter.java
URL: http://svn.apache.org/viewvc/roller/branches/roller_5.0/weblogger-web/src/main/java/org/apache/roller/weblogger/ui/core/filters/LoadSaltFilter.java?rev=1340501&r1=1340500&r2=1340501&view=diff
==============================================================================
--- roller/branches/roller_5.0/weblogger-web/src/main/java/org/apache/roller/weblogger/ui/core/filters/LoadSaltFilter.java (original)
+++ roller/branches/roller_5.0/weblogger-web/src/main/java/org/apache/roller/weblogger/ui/core/filters/LoadSaltFilter.java Sat May 19 15:30:49 2012
@@ -26,7 +26,11 @@ import org.apache.commons.lang.RandomStr
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.roller.weblogger.ui.rendering.util.cache.SaltCache;
- 
+
+/**
+ * Filter generates a unique salt value for use in any HTTP form generated by 
+ * Roller. See also: ValidateSalt filter.
+ */
 public class LoadSaltFilter implements Filter {
     private static Log log = LogFactory.getLog(LoadSaltFilter.class);
 

Modified: roller/branches/roller_5.0/weblogger-web/src/main/java/org/apache/roller/weblogger/ui/core/filters/ValidateSaltFilter.java
URL: http://svn.apache.org/viewvc/roller/branches/roller_5.0/weblogger-web/src/main/java/org/apache/roller/weblogger/ui/core/filters/ValidateSaltFilter.java?rev=1340501&r1=1340500&r2=1340501&view=diff
==============================================================================
--- roller/branches/roller_5.0/weblogger-web/src/main/java/org/apache/roller/weblogger/ui/core/filters/ValidateSaltFilter.java (original)
+++ roller/branches/roller_5.0/weblogger-web/src/main/java/org/apache/roller/weblogger/ui/core/filters/ValidateSaltFilter.java Sat May 19 15:30:49 2012
@@ -24,7 +24,12 @@ import javax.servlet.http.HttpServletReq
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.roller.weblogger.ui.rendering.util.cache.SaltCache;
- 
+
+/**
+ * Filter checks all POST request for presence of valid salt value and rejects
+ * those without a salt value or with a salt value not generated by this Roller
+ * instance.
+ */
 public class ValidateSaltFilter implements Filter  {
     private static Log log = LogFactory.getLog(ValidateSaltFilter.class);
 

Modified: roller/branches/roller_5.0/weblogger-web/src/main/java/org/apache/roller/weblogger/ui/rendering/plugins/comments/AkismetCommentValidator.java
URL: http://svn.apache.org/viewvc/roller/branches/roller_5.0/weblogger-web/src/main/java/org/apache/roller/weblogger/ui/rendering/plugins/comments/AkismetCommentValidator.java?rev=1340501&r1=1340500&r2=1340501&view=diff
==============================================================================
--- roller/branches/roller_5.0/weblogger-web/src/main/java/org/apache/roller/weblogger/ui/rendering/plugins/comments/AkismetCommentValidator.java (original)
+++ roller/branches/roller_5.0/weblogger-web/src/main/java/org/apache/roller/weblogger/ui/rendering/plugins/comments/AkismetCommentValidator.java Sat May 19 15:30:49 2012
@@ -29,7 +29,6 @@ import org.apache.commons.logging.LogFac
 import org.apache.roller.weblogger.business.WebloggerFactory;
 import org.apache.roller.weblogger.config.WebloggerConfig;
 import org.apache.roller.weblogger.pojos.WeblogEntryComment;
-import org.apache.roller.weblogger.ui.rendering.util.*;
 import org.apache.roller.weblogger.util.RollerMessages;
 
 

Modified: roller/branches/roller_5.0/weblogger-web/src/main/java/org/apache/roller/weblogger/ui/rendering/util/cache/SaltCache.java
URL: http://svn.apache.org/viewvc/roller/branches/roller_5.0/weblogger-web/src/main/java/org/apache/roller/weblogger/ui/rendering/util/cache/SaltCache.java?rev=1340501&r1=1340500&r2=1340501&view=diff
==============================================================================
--- roller/branches/roller_5.0/weblogger-web/src/main/java/org/apache/roller/weblogger/ui/rendering/util/cache/SaltCache.java (original)
+++ roller/branches/roller_5.0/weblogger-web/src/main/java/org/apache/roller/weblogger/ui/rendering/util/cache/SaltCache.java Sat May 19 15:30:49 2012
@@ -29,8 +29,10 @@ import org.apache.roller.weblogger.util.
 import org.apache.roller.weblogger.util.cache.ExpiringCacheEntry;
 
 /**
- * For thwarting XSRF attacks. If you're running distributed, 
- * then you must use a distributed cache, e.g. memcached
+ * Cache for XSRF salt values. This cache is part of XSRF protection wherein 
+ * each HTTP POST must be accompanied by a valid salt value, i.e. one generated 
+ * by Roller. If you're running distributed, then you must use a distributed 
+ * cache, e.g. memcached
  */
 public class SaltCache {
     private static Log log = LogFactory.getLog(SaltCache.class);