You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@labs.apache.org by si...@apache.org on 2008/11/18 15:22:46 UTC

svn commit: r718603 - in /labs/magma/trunk/foundation-validation: ./ src/main/java/org/apache/magma/validation/validators/ src/test/java/org/apache/magma/validation/validators/

Author: simoneg
Date: Tue Nov 18 06:22:46 2008
New Revision: 718603

URL: http://svn.apache.org/viewvc?rev=718603&view=rev
Log:
LABS-212 : preliminary HTML validator

Added:
    labs/magma/trunk/foundation-validation/src/main/java/org/apache/magma/validation/validators/RichTextVisitor.java
    labs/magma/trunk/foundation-validation/src/test/java/org/apache/magma/validation/validators/RichTextVisitorTest.java
Modified:
    labs/magma/trunk/foundation-validation/pom.xml
    labs/magma/trunk/foundation-validation/src/main/java/org/apache/magma/validation/validators/RichTextValidator.java

Modified: labs/magma/trunk/foundation-validation/pom.xml
URL: http://svn.apache.org/viewvc/labs/magma/trunk/foundation-validation/pom.xml?rev=718603&r1=718602&r2=718603&view=diff
==============================================================================
--- labs/magma/trunk/foundation-validation/pom.xml (original)
+++ labs/magma/trunk/foundation-validation/pom.xml Tue Nov 18 06:22:46 2008
@@ -14,10 +14,7 @@
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
--->
-<project
-	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"
-	xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+--><project xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
 	<modelVersion>4.0.0</modelVersion>
 	<parent>
 	  <groupId>org.apache.magma</groupId>
@@ -50,6 +47,11 @@
 			<artifactId>foundation-basics</artifactId>
 			<version>0.0.1-SNAPSHOT</version>
 		</dependency>
-	</dependencies>
+	  <dependency>
+      <groupId>org.htmlparser</groupId>
+      <artifactId>htmlparser</artifactId>
+      <version>1.6</version>
+    </dependency>
+  </dependencies>
 
-</project>
+</project>
\ No newline at end of file

Modified: labs/magma/trunk/foundation-validation/src/main/java/org/apache/magma/validation/validators/RichTextValidator.java
URL: http://svn.apache.org/viewvc/labs/magma/trunk/foundation-validation/src/main/java/org/apache/magma/validation/validators/RichTextValidator.java?rev=718603&r1=718602&r2=718603&view=diff
==============================================================================
--- labs/magma/trunk/foundation-validation/src/main/java/org/apache/magma/validation/validators/RichTextValidator.java (original)
+++ labs/magma/trunk/foundation-validation/src/main/java/org/apache/magma/validation/validators/RichTextValidator.java Tue Nov 18 06:22:46 2008
@@ -3,7 +3,10 @@
 import java.util.List;
 
 import org.apache.magma.basics.LocalizableString;
+import org.apache.magma.beans.PropertyInfo;
 import org.apache.magma.validation.Validator;
+import org.htmlparser.Parser;
+import org.htmlparser.visitors.NodeVisitor;
 
 public class RichTextValidator implements Validator<String> {
 
@@ -24,9 +27,11 @@
 	private boolean permitJustify;
 	
 	
-	public List<LocalizableString> validate(String value) {
-		// TODO Auto-generated method stub
-		return null;
+	public List<LocalizableString> validate(Object bean, PropertyInfo property, String value) {
+		if (value == null || value.length() == 0) return null;
+		RichTextVisitor visit = new RichTextVisitor(this);
+		visit.visit(value);
+		return visit.getMessages();
 	}
 
 

Added: labs/magma/trunk/foundation-validation/src/main/java/org/apache/magma/validation/validators/RichTextVisitor.java
URL: http://svn.apache.org/viewvc/labs/magma/trunk/foundation-validation/src/main/java/org/apache/magma/validation/validators/RichTextVisitor.java?rev=718603&view=auto
==============================================================================
--- labs/magma/trunk/foundation-validation/src/main/java/org/apache/magma/validation/validators/RichTextVisitor.java (added)
+++ labs/magma/trunk/foundation-validation/src/main/java/org/apache/magma/validation/validators/RichTextVisitor.java Tue Nov 18 06:22:46 2008
@@ -0,0 +1,152 @@
+package org.apache.magma.validation.validators;
+
+import java.io.IOException;
+import java.io.StringReader;
+import java.io.Writer;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.magma.basics.LocalizableString;
+import org.apache.magma.basics.MagmaException;
+import org.htmlparser.Node;
+import org.htmlparser.Parser;
+import org.htmlparser.Remark;
+import org.htmlparser.Tag;
+import org.htmlparser.Text;
+import org.htmlparser.util.NodeIterator;
+import org.htmlparser.util.NodeList;
+import org.htmlparser.util.ParserException;
+import org.htmlparser.visitors.NodeVisitor;
+
+public class RichTextVisitor {
+
+	private RichTextValidator validator = null;
+	private List<LocalizableString> messages = new ArrayList<LocalizableString>();
+	private Writer output = null;
+	
+	public RichTextVisitor(RichTextValidator validator) {
+		this.validator = validator;
+	}
+	
+	public RichTextVisitor(RichTextValidator validator, Writer out) {
+		this.validator = validator;
+		this.output = out;
+	}
+	
+	public List<LocalizableString> getMessages() {
+		return this.messages;
+	}
+	
+	public void visit(CharSequence content) {
+		try {
+			Parser p = new Parser();
+			p.setInputHTML(content.toString());
+			NodeList nodes = p.parse(null);
+			for (NodeIterator iterator = nodes.elements();
+	            iterator.hasMoreNodes ();
+	        	visit(iterator.nextNode ()));
+		} catch (Exception e) {
+			throw new MagmaException(e, "Error parsing html");
+		}
+	}
+	
+	public void visit(Node node) throws IOException, ParserException {
+		if (node instanceof Text) {
+			visitStringNode((Text)node);
+		} else if (node instanceof Remark) {
+			visitRemarkNode((Remark)node);
+		} else if (node instanceof Tag){
+			Tag tag = (Tag) node;
+			boolean skip = skipTag(tag);
+			if (!skip) {
+				if (this.output != null) {
+					this.output.append('<');
+					this.output.append(tag.getText());
+					this.output.append('>');
+				}
+			} else {
+				this.messages.add(new LocalizableString("Tag {0} is not permitted, line {1}", tag.getTagName(), tag.getStartingLineNumber()));
+			}
+			NodeList children = tag.getChildren();
+			if (children != null) {
+				for (NodeIterator iterator = children.elements();
+		            iterator.hasMoreNodes ();
+		        	visit(iterator.nextNode ()));
+			}
+			if (!skip) {
+				if (this.output != null && tag.getEndTag() != null) {
+					this.output.append("<");
+					this.output.append(tag.getEndTag().getText());
+					this.output.append('>');
+				}				
+			}
+		}
+	}
+
+	public void visitRemarkNode(Remark remark) throws IOException {
+		if (this.output != null) output.append(remark.toHtml());
+	}
+	
+	public void visitStringNode(Text string) throws IOException {
+		if (this.output != null) output.append(string.toHtml());
+	}
+	
+	public boolean skipTag(Tag tag) {
+		String tagname = tag.getTagName();
+		boolean skiptag = false;		
+		if (!validator.isPermitBackgroundColors()) {
+			tag.removeAttribute("bgcolor");
+			tag.removeAttribute("background");
+			// TODO background colors could be in style attribute as well
+		}
+		if (!validator.isPermitColors()) {
+			// TODO colors are in style attribute? maybe also background?
+		}
+		if (!skiptag && !validator.isPermitEmphasis()) {
+			skiptag = tagname.equals("EM") || tagname.equals("I"); 
+		}
+		if (!skiptag && !validator.isPermitHeadings()) {
+			skiptag = tagname.startsWith("H") && tagname.length() == 2; 
+		}
+		if (!skiptag && !validator.isPermitImages()) {
+			skiptag = tagname.equals("IMG"); 
+		}
+		if (!skiptag && !validator.isPermitIndents()) {
+			// TODO how are indents handled?
+		}
+		if (!skiptag && !validator.isPermitJustify()) {
+			// TODO how are justify handled?
+		}
+		if (!skiptag && !validator.isPermitLinks()) {
+			skiptag = tagname.equals("A"); 
+		}
+		if (!skiptag && !validator.isPermitLists()) {
+			skiptag = tagname.equals("UL") || tagname.equals("LI")  || tagname.equals("OL")  || tagname.equals("DL")  || tagname.equals("DT")  || tagname.equals("DD"); 
+		}
+		if (!skiptag && !validator.isPermitScripts()) {
+			skiptag = tagname.equals("SCRIPT"); 
+		}
+		if (!skiptag && !validator.isPermitStrong()) {
+			skiptag = tagname.equals("STRONG") || tagname.equals("B"); 
+		}
+		if (!skiptag && !validator.isPermitTables()) {
+			skiptag = tagname.equals("TABLE") || tagname.equals("TH")  || tagname.equals("TD")  || tagname.equals("TR")  || tagname.equals("TBODY")  || tagname.equals("THEAD") || tagname.equals("TFOOT"); 
+		}
+		
+		String style = tag.getAttribute("style");
+		if (style != null && style.length() > 0) {
+			String[] split = style.split(";");
+			String newstyle = "";
+			for (String css : split) {
+				css = css.trim();
+				boolean avoid = false;
+				String[] cssparts = css.split(":");
+				// TODO check all possible CSS style directives
+				//if (cssparts[0].trim().equals("font"))
+			}
+		}
+		
+		return skiptag;
+	}
+	
+}

Added: labs/magma/trunk/foundation-validation/src/test/java/org/apache/magma/validation/validators/RichTextVisitorTest.java
URL: http://svn.apache.org/viewvc/labs/magma/trunk/foundation-validation/src/test/java/org/apache/magma/validation/validators/RichTextVisitorTest.java?rev=718603&view=auto
==============================================================================
--- labs/magma/trunk/foundation-validation/src/test/java/org/apache/magma/validation/validators/RichTextVisitorTest.java (added)
+++ labs/magma/trunk/foundation-validation/src/test/java/org/apache/magma/validation/validators/RichTextVisitorTest.java Tue Nov 18 06:22:46 2008
@@ -0,0 +1,52 @@
+package org.apache.magma.validation.validators;
+
+import static org.junit.Assert.*;
+import static org.hamcrest.CoreMatchers.*;
+
+import java.io.StringWriter;
+
+import org.htmlparser.Parser;
+import org.junit.Test;
+
+
+public class RichTextVisitorTest {
+
+	@Test
+	public void validTree() throws Exception {
+		RichTextValidator validator = new RichTextValidator();
+		StringWriter sw = new StringWriter();
+		RichTextVisitor visitor = new RichTextVisitor(validator, sw);
+		
+		String simpleHtml = "<html><head><title>Ciao</title></head><body><p>This is a text<br/>with a br inside.</p></body></html>";
+		visitor.visit(simpleHtml);
+		sw.flush();
+		assertEquals(simpleHtml, sw.toString());
+	}
+	
+	@Test
+	public void malformedHtml() throws Exception {
+		RichTextValidator validator = new RichTextValidator();
+		StringWriter sw = new StringWriter();
+		RichTextVisitor visitor = new RichTextVisitor(validator, sw);
+		
+		String simpleHtml = "<html><head><title>Ciao</title></head><body><p>This is a text<br/>with a br inside.</p></body></html>";
+		String wrongHtml = "<html><head><title>Ciao</head><body><p>This is a text<br/>with a br inside.</body></html>";
+		visitor.visit(wrongHtml);
+		sw.flush();
+		assertEquals(simpleHtml, sw.toString());		
+	}
+	
+	@Test
+	public void forbiddenElements() throws Exception {
+		RichTextValidator validator = new RichTextValidator();
+		StringWriter sw = new StringWriter();
+		RichTextVisitor visitor = new RichTextVisitor(validator, sw);
+		
+		String simpleHtml = "<html><head><title>Ciao</title></head><body><p>This is a text<br/>with a br inside.</p></body></html>";
+		String wrongHtml = "<html><head><title>Ciao</title></head><body><p>This is a <img src=\"ciao.gif\"/>text<br/>with a <a href=\"links.html\">br</a> inside.</p></body></html>";
+		visitor.visit(wrongHtml);
+		sw.flush();
+		assertEquals(simpleHtml, sw.toString());	
+		assertThat(visitor.getMessages().size(), equalTo(2));
+	}
+}



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@labs.apache.org
For additional commands, e-mail: commits-help@labs.apache.org