You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@shindig.apache.org by lr...@apache.org on 2009/04/23 02:08:56 UTC
svn commit: r767733 - in /incubator/shindig/trunk/java/gadgets/src:
main/java/org/apache/shindig/gadgets/parse/
main/java/org/apache/shindig/gadgets/parse/nekohtml/
main/java/org/apache/shindig/gadgets/templates/
test/java/org/apache/shindig/gadgets/pa...
Author: lryan
Date: Thu Apr 23 00:08:55 2009
New Revision: 767733
URL: http://svn.apache.org/viewvc?rev=767733&view=rev
Log:
Initial version of HTML minification using a serializer. Contributed by Chi-Ngai Wan
Added:
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoCompactSerializer.java
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/nekohtml/AbstractParserAndSerializerTest.java
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/nekohtml/NekoCompactSerializerTest.java
incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-with-iecond-comments-expected.html
incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-with-iecond-comments.html
incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-with-specialtags-expected.html
incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-with-specialtags.html
Modified:
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/GadgetHtmlParser.java
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/HtmlSerializer.java
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoHtmlParser.java
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSerializer.java
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSimplifiedHtmlParser.java
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/templates/AbstractTagHandler.java
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/templates/DefaultTemplateProcessor.java
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/templates/HtmlTagHandler.java
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/nekohtml/NekoParserAndSerializeTest.java
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/nekohtml/SocialMarkupHtmlParserTest.java
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/templates/DefaultTemplateProcessorTest.java
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/templates/RenderTagHandlerTest.java
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/templates/TemplateBasedTagHandlerTest.java
Modified: incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/GadgetHtmlParser.java
URL: http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/GadgetHtmlParser.java?rev=767733&r1=767732&r2=767733&view=diff
==============================================================================
--- incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/GadgetHtmlParser.java (original)
+++ incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/GadgetHtmlParser.java Thu Apr 23 00:08:55 2009
@@ -70,6 +70,9 @@
}
if (document == null) {
document = parseDomImpl(source);
+
+ HtmlSerializer.attach(document, createSerializer(), source);
+
// Ensure head tag exists
if (DomUtil.getFirstNamedChildNode(document.getDocumentElement(), "head") == null) {
// Add as first element
@@ -96,6 +99,12 @@
}
/**
+ * Creates an instance of Serializer that can serialize the document
+ * generated by this parser.
+ */
+ protected abstract HtmlSerializer createSerializer();
+
+ /**
* Parses a snippet of markup and appends the result as children to the
* provided node.
*
Modified: incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/HtmlSerializer.java
URL: http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/HtmlSerializer.java?rev=767733&r1=767732&r2=767733&view=diff
==============================================================================
--- incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/HtmlSerializer.java (original)
+++ incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/HtmlSerializer.java Thu Apr 23 00:08:55 2009
@@ -17,8 +17,10 @@
*/
package org.apache.shindig.gadgets.parse;
+import org.cyberneko.html.HTMLEntities;
import org.w3c.dom.Document;
+import java.io.IOException;
import java.io.StringWriter;
/**
@@ -46,7 +48,7 @@
* @param serializer
* @param originalContent may be null
*/
- public static void attach(Document doc, HtmlSerializer serializer, String originalContent) {
+ static void attach(Document doc, HtmlSerializer serializer, String originalContent) {
doc.setUserData(KEY, serializer, null);
if (originalContent != null) {
doc.setUserData(ORIGINAL_LENGTH, originalContent.length(), null);
@@ -57,7 +59,7 @@
* Copy serializer from one document to another. Note this requires that
* serializers are thread safe
*/
- public static void copySerializer(Document from, Document to) {
+ static void copySerializer(Document from, Document to) {
Integer length = (Integer)from.getUserData(ORIGINAL_LENGTH);
if (length != null) to.setUserData(ORIGINAL_LENGTH, length, null);
to.setUserData(KEY, from.getUserData(KEY), null);
@@ -95,7 +97,7 @@
* @return
*/
public static String serialize(Document doc) {
- return ((HtmlSerializer)doc.getUserData(KEY)).serializeImpl(doc);
+ return ((HtmlSerializer) doc.getUserData(KEY)).serializeImpl(doc);
}
/**
@@ -105,4 +107,16 @@
*/
protected abstract String serializeImpl(Document doc);
+ public static void printEscapedText(CharSequence text, Appendable output) throws IOException {
+ for (int i = 0; i < text.length(); i++) {
+ char c = text.charAt(i);
+ String entity = HTMLEntities.get(c);
+ if (entity != null) {
+ output.append('&').append(entity).append(";");
+ } else {
+ output.append(c);
+ }
+ }
+ }
+
}
Added: incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoCompactSerializer.java
URL: http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoCompactSerializer.java?rev=767733&view=auto
==============================================================================
--- incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoCompactSerializer.java (added)
+++ incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoCompactSerializer.java Thu Apr 23 00:08:55 2009
@@ -0,0 +1,112 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.shindig.gadgets.parse.nekohtml;
+
+import com.google.common.collect.ImmutableSortedSet;
+
+import org.w3c.dom.Node;
+import org.apache.commons.lang.StringUtils;
+
+import java.io.IOException;
+
+/**
+ * Performs simple content compaction while writing HTML documents. The compaction includes:
+ * <ul>
+ * <li>Collapsing consecutive whitespaces while preserving those within style, pre and script tags
+ * <li>Removing HTML comments while preserving IE conditional comments
+ * </ul>
+ *
+ * TODO - Consider adding attribute quoting elimination, empty attribute elimination where safe
+ * end-tag elmination where safe.
+ */
+public class NekoCompactSerializer extends NekoSerializer {
+
+ private static final ImmutableSortedSet<String> SPECIAL_TAGS = ImmutableSortedSet
+ .orderedBy(String.CASE_INSENSITIVE_ORDER)
+ .add("style", "pre", "script", "textarea")
+ .build();
+ private static final String HTML_WHITESPACE = " \t\r\n";
+
+ @Override
+ protected void writeText(Node n, Appendable output) throws IOException {
+ if (isSpecialTag(n.getParentNode().getNodeName())) {
+ super.writeText(n, output);
+ } else {
+ collapseWhitespace(n.getTextContent(), output);
+ }
+ }
+
+ @Override
+ protected void writeComment(Node n, Appendable output) throws IOException {
+ if (isSpecialTag(n.getParentNode().getNodeName())) {
+ super.writeComment(n, output);
+ } else if (isIeConditionalComment(n)) {
+ super.writeComment(n, output);
+ }
+ }
+
+ /**
+ * See <a href="http://msdn.microsoft.com/en-us/library/ms537512(printer).aspx">MSDN</a>
+ * and <a href="http://www.quirksmode.org/css/condcom.html">PPK</a>
+ */
+ private boolean isIeConditionalComment(Node n) {
+ String comment = n.getTextContent();
+ return comment.indexOf("[if ") != -1 && comment.indexOf("[endif]") != -1;
+ }
+
+ /**
+ * Returns true if a tag with a given tagName should preserve any whitespaces
+ * in its children nodes.
+ */
+ static boolean isSpecialTag(String tagName) {
+ return SPECIAL_TAGS.contains(tagName);
+ }
+
+ /**
+ * Collapse any consecutive HTML whitespace characters inside a string into
+ * one space character (0x20). This method will not output any characters when
+ * the given string is entirely composed of whitespaces.
+ *
+ * References:
+ * <ul>
+ * <li>http://www.w3.org/TR/html401/struct/text.html#h-9.1</li>
+ * <li>http://java.sun.com/javase/6/docs/api/java/lang/Character.html#isWhitespace(char)</li>
+ * </ul>
+ */
+ static void collapseWhitespace(String str, Appendable output) throws IOException {
+ str = StringUtils.stripStart(str, HTML_WHITESPACE);
+
+ // Whitespaces between a sequence of non-whitespace characters
+ boolean seenWhitespace = false;
+ for (int i = 0; i < str.length(); i++) {
+ char c = str.charAt(i);
+
+ if (HTML_WHITESPACE.indexOf(c) != -1) {
+ seenWhitespace = true;
+ } else {
+ if (seenWhitespace) {
+ output.append(' ');
+ }
+ output.append(c);
+
+ seenWhitespace = false;
+ }
+ }
+ }
+}
Modified: incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoHtmlParser.java
URL: http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoHtmlParser.java?rev=767733&r1=767732&r2=767733&view=diff
==============================================================================
--- incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoHtmlParser.java (original)
+++ incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoHtmlParser.java Thu Apr 23 00:08:55 2009
@@ -49,12 +49,14 @@
this.documentProvider = documentProvider;
}
+ protected HtmlSerializer createSerializer() {
+ return new NekoSerializer();
+ }
+
@Override
public Document parseDomImpl(String source) throws GadgetException {
try {
- Document document = parseDomInternal(source);
- HtmlSerializer.attach(document, new NekoSerializer(), source);
- return document;
+ return parseDomInternal(source);
} catch (Exception e) {
throw new GadgetException(GadgetException.Code.HTML_PARSE_ERROR, e);
}
Modified: incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSerializer.java
URL: http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSerializer.java?rev=767733&r1=767732&r2=767733&view=diff
==============================================================================
--- incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSerializer.java (original)
+++ incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSerializer.java Thu Apr 23 00:08:55 2009
@@ -21,7 +21,6 @@
import org.apache.shindig.gadgets.parse.HtmlSerializer;
import org.apache.xerces.xni.QName;
import org.cyberneko.html.HTMLElements;
-import org.cyberneko.html.HTMLEntities;
import org.w3c.dom.Attr;
import org.w3c.dom.Document;
import org.w3c.dom.DocumentType;
@@ -40,12 +39,9 @@
* This parser does not try to escape entities in text content as it expects the parser
* to have retained the original entity references rather than its resolved form in text nodes
*/
-public class NekoSerializer extends HtmlSerializer
-{
+public class NekoSerializer extends HtmlSerializer {
+
private static final Set<String> URL_ATTRIBUTES = ImmutableSet.of("href", "src");
-
- public NekoSerializer() {
- }
@Override
public String serializeImpl(Document doc) {
@@ -54,25 +50,25 @@
if (doc.getDoctype() != null) {
outputDocType(doc.getDoctype(), sw);
}
- serialize(doc, sw);
+ this.serialize(doc, sw);
return sw.toString();
} catch (IOException ioe) {
return null;
}
}
- public static void serialize(Node n, Appendable output) throws IOException {
+ public void serialize(Node n, Appendable output) throws IOException {
serialize(n, output, false);
}
- private static void serialize(Node n, Appendable output, boolean xmlMode)
+ private void serialize(Node n, Appendable output, boolean xmlMode)
throws IOException {
switch (n.getNodeType()) {
case Node.CDATA_SECTION_NODE: {
break;
}
case Node.COMMENT_NODE: {
- output.append("<!--").append(n.getNodeValue()).append("-->");
+ writeComment(n, output);
break;
}
case Node.DOCUMENT_NODE: {
@@ -104,13 +100,21 @@
break;
}
case Node.TEXT_NODE: {
- output.append(n.getTextContent());
+ writeText(n, output);
break;
}
}
}
- public static void outputDocType(DocumentType docType, Appendable output) throws IOException {
+ protected void writeText(Node n, Appendable output) throws IOException {
+ output.append(n.getTextContent());
+ }
+
+ protected void writeComment(Node n, Appendable output) throws IOException {
+ output.append("<!--").append(n.getNodeValue()).append("-->");
+ }
+
+ private void outputDocType(DocumentType docType, Appendable output) throws IOException {
output.append("<!DOCTYPE ");
// Use this so name matches case for XHTML
output.append(docType.getOwnerDocument().getDocumentElement().getNodeName());
@@ -126,13 +130,6 @@
}
/**
- * Print the start of an HTML element.
- */
- public static void printStartElement(Element elem, Appendable output) throws IOException {
- printStartElement(elem, output, false);
- }
-
- /**
* Print the start of an HTML element. If withXmlClose==true, this is an
* empty element that should have its content
*/
@@ -157,7 +154,7 @@
output.append(withXmlClose ? "/>" : ">");
}
- public static void printAttributeValue(String text, Appendable output, boolean isUrl) throws IOException {
+ private static void printAttributeValue(String text, Appendable output, boolean isUrl) throws IOException {
int length = text.length();
for (int j = 0; j < length; j++) {
char c = text.charAt(j);
@@ -171,22 +168,10 @@
}
}
- public static void printEscapedText(CharSequence text, Appendable output) throws IOException {
- for (int i = 0; i < text.length(); i++) {
- char c = text.charAt(i);
- String entity = HTMLEntities.get(c);
- if (entity != null) {
- output.append('&').append(entity).append(";");
- } else {
- output.append(c);
- }
- }
- }
-
/**
* Returns true if the listed attribute is an URL attribute.
*/
- public static boolean isUrlAttribute(QName name, String attributeName) {
+ static boolean isUrlAttribute(QName name, String attributeName) {
return name.uri == null && URL_ATTRIBUTES.contains(attributeName);
}
}
Modified: incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSimplifiedHtmlParser.java
URL: http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSimplifiedHtmlParser.java?rev=767733&r1=767732&r2=767733&view=diff
==============================================================================
--- incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSimplifiedHtmlParser.java (original)
+++ incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSimplifiedHtmlParser.java Thu Apr 23 00:08:55 2009
@@ -71,6 +71,10 @@
this.documentFactory = documentFactory;
}
+ protected HtmlSerializer createSerializer() {
+ return new NekoSerializer();
+ }
+
@Override
protected Document parseDomImpl(String source) {
DocumentHandler handler;
@@ -84,7 +88,6 @@
Document document = handler.getDocument();
DocumentFragment fragment = handler.getFragment();
normalizeFragment(document, fragment);
- HtmlSerializer.attach(document, new NekoSerializer(), source);
return document;
}
Modified: incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/templates/AbstractTagHandler.java
URL: http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/templates/AbstractTagHandler.java?rev=767733&r1=767732&r2=767733&view=diff
==============================================================================
--- incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/templates/AbstractTagHandler.java (original)
+++ incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/templates/AbstractTagHandler.java Thu Apr 23 00:08:55 2009
@@ -18,7 +18,7 @@
*/
package org.apache.shindig.gadgets.templates;
-import org.apache.shindig.gadgets.parse.nekohtml.NekoSerializer;
+import org.apache.shindig.gadgets.parse.HtmlSerializer;
import org.w3c.dom.DocumentFragment;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
@@ -74,7 +74,7 @@
try {
StringBuilder sb = new StringBuilder(text.length());
- NekoSerializer.printEscapedText(text, sb);
+ HtmlSerializer.printEscapedText(text, sb);
parent.appendChild(parent.getOwnerDocument().createTextNode(sb.toString()));
} catch (IOException ioe) {
throw new RuntimeException(ioe);
Modified: incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/templates/DefaultTemplateProcessor.java
URL: http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/templates/DefaultTemplateProcessor.java?rev=767733&r1=767732&r2=767733&view=diff
==============================================================================
--- incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/templates/DefaultTemplateProcessor.java (original)
+++ incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/templates/DefaultTemplateProcessor.java Thu Apr 23 00:08:55 2009
@@ -20,7 +20,7 @@
import org.apache.shindig.expressions.Expressions;
import org.apache.shindig.gadgets.GadgetELResolver;
-import org.apache.shindig.gadgets.parse.nekohtml.NekoSerializer;
+import org.apache.shindig.gadgets.parse.HtmlSerializer;
import java.io.IOException;
import java.util.Iterator;
@@ -204,7 +204,7 @@
// And now escape
outputBuffer.setLength(0);
try {
- NekoSerializer.printEscapedText(value, outputBuffer);
+ HtmlSerializer.printEscapedText(value, outputBuffer);
} catch (IOException e) {
// Can't happen writing to StringBuilder
throw new RuntimeException(e);
Modified: incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/templates/HtmlTagHandler.java
URL: http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/templates/HtmlTagHandler.java?rev=767733&r1=767732&r2=767733&view=diff
==============================================================================
--- incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/templates/HtmlTagHandler.java (original)
+++ incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/templates/HtmlTagHandler.java Thu Apr 23 00:08:55 2009
@@ -20,7 +20,7 @@
import org.apache.shindig.gadgets.GadgetException;
import org.apache.shindig.gadgets.parse.GadgetHtmlParser;
-import org.apache.shindig.gadgets.parse.nekohtml.NekoSerializer;
+import org.apache.shindig.gadgets.parse.HtmlSerializer;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
@@ -56,7 +56,7 @@
} catch (GadgetException ge) {
try {
StringBuilder sb = new StringBuilder("Error: ");
- NekoSerializer.printEscapedText(ge.getMessage(), sb);
+ HtmlSerializer.printEscapedText(ge.getMessage(), sb);
Node comment = result.getOwnerDocument().createComment(sb.toString());
result.appendChild(comment);
} catch (IOException e) {
Added: incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/nekohtml/AbstractParserAndSerializerTest.java
URL: http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/nekohtml/AbstractParserAndSerializerTest.java?rev=767733&view=auto
==============================================================================
--- incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/nekohtml/AbstractParserAndSerializerTest.java (added)
+++ incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/nekohtml/AbstractParserAndSerializerTest.java Thu Apr 23 00:08:55 2009
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.shindig.gadgets.parse.nekohtml;
+
+import junit.framework.TestCase;
+import org.apache.commons.io.IOUtils;
+import org.apache.commons.lang.StringUtils;
+import org.apache.shindig.gadgets.parse.GadgetHtmlParser;
+import org.apache.shindig.gadgets.parse.HtmlSerializer;
+import org.w3c.dom.Document;
+
+import java.io.IOException;
+
+/**
+ * Base test fixture for HTML parsing and serialization.
+ */
+abstract class AbstractParserAndSerializerTest extends TestCase {
+
+ /** The vm line separator */
+ private static final String EOL = System.getProperty("line.separator");
+
+ String loadFile(String path) throws IOException {
+ return IOUtils.toString(this.getClass().getClassLoader().
+ getResourceAsStream(path));
+ }
+
+ void parseAndCompareBalanced(String content, String expected, GadgetHtmlParser parser)
+ throws Exception {
+ Document document = parser.parseDom(content);
+ expected = StringUtils.replace(expected, EOL, "\n");
+ assertEquals(expected, HtmlSerializer.serialize(document));
+ }
+}
Added: incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/nekohtml/NekoCompactSerializerTest.java
URL: http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/nekohtml/NekoCompactSerializerTest.java?rev=767733&view=auto
==============================================================================
--- incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/nekohtml/NekoCompactSerializerTest.java (added)
+++ incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/nekohtml/NekoCompactSerializerTest.java Thu Apr 23 00:08:55 2009
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.shindig.gadgets.parse.nekohtml;
+
+import org.apache.shindig.gadgets.parse.ParseModule;
+import org.apache.shindig.gadgets.parse.HtmlSerializer;
+
+import java.io.StringWriter;
+import java.io.IOException;
+
+/**
+ * Test cases for NekoCompactSerializer.
+ */
+public class NekoCompactSerializerTest extends AbstractParserAndSerializerTest {
+
+ private NekoHtmlParser full = new NekoHtmlParser(
+ new ParseModule.DOMImplementationProvider().get()) {
+ @Override
+ protected HtmlSerializer createSerializer() {
+ return new NekoCompactSerializer();
+ }
+ };
+
+ public void testWhitespaceNotCollapsedInSpecialTags() throws Exception {
+ String content = loadFile(
+ "org/apache/shindig/gadgets/parse/nekohtml/test-with-specialtags-expected.html");
+ String expected = loadFile(
+ "org/apache/shindig/gadgets/parse/nekohtml/test-with-specialtags-expected.html");
+ parseAndCompareBalanced(content, expected, full);
+ }
+
+ public void testIeConditionalCommentNotRemoved() throws Exception {
+ String content = loadFile("org/apache/shindig/gadgets/parse/nekohtml/test-with-iecond-comments.html");
+ String expected = loadFile(
+ "org/apache/shindig/gadgets/parse/nekohtml/test-with-iecond-comments-expected.html");
+ parseAndCompareBalanced(content, expected, full);
+ }
+
+ public void testSpecialTagsAreRecognized() {
+ assertSpecialTag("textArea");
+ assertSpecialTag("scrIpt");
+ assertSpecialTag("Style");
+ assertSpecialTag("pRe");
+ }
+
+ private static void assertSpecialTag(String tagName) {
+ assertTrue(tagName + "should be special tag",
+ NekoCompactSerializer.isSpecialTag(tagName));
+ assertTrue(tagName.toUpperCase() + " should be special tag",
+ NekoCompactSerializer.isSpecialTag(tagName.toUpperCase()));
+ assertTrue(tagName.toLowerCase() + "should be special tag",
+ NekoCompactSerializer.isSpecialTag(tagName.toLowerCase()));
+ }
+
+ public void testCollapseHtmlWhitespace() throws IOException {
+ assertCollapsed("abc", "abc");
+ assertCollapsed("abc ", "abc");
+ assertCollapsed(" abc", "abc");
+ assertCollapsed(" abc", "abc");
+ assertCollapsed("abc \r", "abc");
+ assertCollapsed("a\t bc", "a bc");
+ assertCollapsed("a b\n\r c", "a b c");
+ assertCollapsed(" \ra \tb \n c ", "a b c");
+ assertCollapsed(" \n\t\r ", "");
+ }
+
+ private static void assertCollapsed(String input, String expected) throws IOException {
+ Appendable output = new StringWriter();
+ NekoCompactSerializer.collapseWhitespace(input, output);
+ assertEquals(expected, output.toString());
+ }
+}
Modified: incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/nekohtml/NekoParserAndSerializeTest.java
URL: http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/nekohtml/NekoParserAndSerializeTest.java?rev=767733&r1=767732&r2=767733&view=diff
==============================================================================
--- incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/nekohtml/NekoParserAndSerializeTest.java (original)
+++ incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/nekohtml/NekoParserAndSerializeTest.java Thu Apr 23 00:08:55 2009
@@ -17,22 +17,12 @@
*/
package org.apache.shindig.gadgets.parse.nekohtml;
-import org.apache.commons.io.IOUtils;
-import org.apache.commons.lang.StringUtils;
-import org.apache.shindig.gadgets.parse.GadgetHtmlParser;
-import org.apache.shindig.gadgets.parse.HtmlSerializer;
import org.apache.shindig.gadgets.parse.ParseModule;
-import junit.framework.TestCase;
-import org.w3c.dom.Document;
-
/**
* Test behavior of neko based parser and serializers
*/
-public class NekoParserAndSerializeTest extends TestCase {
-
- /** The vm line separator */
- private static final String EOL = System.getProperty( "line.separator" );
+public class NekoParserAndSerializeTest extends AbstractParserAndSerializerTest {
private NekoSimplifiedHtmlParser simple = new NekoSimplifiedHtmlParser(
new ParseModule.DOMImplementationProvider().get());
@@ -41,47 +31,33 @@
public void testDocWithDoctype() throws Exception {
// Note that doctype is properly retained
- String content = IOUtils.toString(this.getClass().getClassLoader().
- getResourceAsStream("org/apache/shindig/gadgets/parse/nekohtml/test.html"));
- String expected = IOUtils.toString(this.getClass().getClassLoader().
- getResourceAsStream("org/apache/shindig/gadgets/parse/nekohtml/test-expected.html"));
+ String content = loadFile("org/apache/shindig/gadgets/parse/nekohtml/test.html");
+ String expected = loadFile("org/apache/shindig/gadgets/parse/nekohtml/test-expected.html");
parseAndCompareBalanced(content, expected, full);
parseAndCompareBalanced(content, expected, simple);
}
public void testDocNoDoctype() throws Exception {
// Note that no doctype is properly created when none specified
- String content = IOUtils.toString(this.getClass().getClassLoader().
- getResourceAsStream("org/apache/shindig/gadgets/parse/nekohtml/test-fulldocnodoctype.html"));
+ String content = loadFile("org/apache/shindig/gadgets/parse/nekohtml/test-fulldocnodoctype.html");
assertNull(full.parseDom(content).getDoctype());
assertNull(simple.parseDom(content).getDoctype());
}
public void testNotADocument() throws Exception {
// Note that no doctype is injected for fragments
- String content = IOUtils.toString(this.getClass().getClassLoader().
- getResourceAsStream("org/apache/shindig/gadgets/parse/nekohtml/test-fragment.html"));
- String expected = IOUtils.toString(this.getClass().getClassLoader().
- getResourceAsStream("org/apache/shindig/gadgets/parse/nekohtml/test-fragment-expected.html"));
+ String content = loadFile("org/apache/shindig/gadgets/parse/nekohtml/test-fragment.html");
+ String expected = loadFile("org/apache/shindig/gadgets/parse/nekohtml/test-fragment-expected.html");
parseAndCompareBalanced(content, expected, full);
parseAndCompareBalanced(content, expected, simple);
}
public void testNoBody() throws Exception {
// Note that no doctype is injected for fragments
- String content = IOUtils.toString(this.getClass().getClassLoader().
- getResourceAsStream("org/apache/shindig/gadgets/parse/nekohtml/test-headnobody.html"));
- String expected = IOUtils.toString(this.getClass().getClassLoader().
- getResourceAsStream(
- "org/apache/shindig/gadgets/parse/nekohtml/test-headnobody-expected.html"));
+ String content = loadFile("org/apache/shindig/gadgets/parse/nekohtml/test-headnobody.html");
+ String expected = loadFile("org/apache/shindig/gadgets/parse/nekohtml/test-headnobody-expected.html");
parseAndCompareBalanced(content, expected, full);
parseAndCompareBalanced(content, expected, simple);
}
- private void parseAndCompareBalanced(String content, String expected, GadgetHtmlParser parser)
- throws Exception {
- Document document = parser.parseDom(content);
- expected = StringUtils.replace(expected, EOL, "\n");
- assertEquals(expected, HtmlSerializer.serialize(document));
- }
}
Modified: incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/nekohtml/SocialMarkupHtmlParserTest.java
URL: http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/nekohtml/SocialMarkupHtmlParserTest.java?rev=767733&r1=767732&r2=767733&view=diff
==============================================================================
--- incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/nekohtml/SocialMarkupHtmlParserTest.java (original)
+++ incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/nekohtml/SocialMarkupHtmlParserTest.java Thu Apr 23 00:08:55 2009
@@ -24,6 +24,7 @@
import org.apache.commons.io.IOUtils;
import org.apache.shindig.gadgets.parse.ParseModule;
+import org.apache.shindig.gadgets.parse.HtmlSerializer;
import org.apache.shindig.gadgets.spec.PipelinedData;
import org.junit.Before;
import org.junit.Test;
@@ -87,7 +88,7 @@
@Test
public void testSocialTemplateSerialization() {
- String content = NekoSerializer.serialize(document);
+ String content = HtmlSerializer.serialize(document);
assertTrue("Empty elements not preserved as XML inside template",
content.contains("<img/>"));
}
Modified: incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/templates/DefaultTemplateProcessorTest.java
URL: http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/templates/DefaultTemplateProcessorTest.java?rev=767733&r1=767732&r2=767733&view=diff
==============================================================================
--- incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/templates/DefaultTemplateProcessorTest.java (original)
+++ incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/templates/DefaultTemplateProcessorTest.java Thu Apr 23 00:08:55 2009
@@ -215,7 +215,7 @@
NodeList children = node.getChildNodes();
for (int i = 0; i < children.getLength(); i++) {
Node child = children.item(i);
- NekoSerializer.serialize(child, sb);
+ new NekoSerializer().serialize(child, sb);
}
return sb.toString();
}
Modified: incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/templates/RenderTagHandlerTest.java
URL: http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/templates/RenderTagHandlerTest.java?rev=767733&r1=767732&r2=767733&view=diff
==============================================================================
--- incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/templates/RenderTagHandlerTest.java (original)
+++ incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/templates/RenderTagHandlerTest.java Thu Apr 23 00:08:55 2009
@@ -118,7 +118,7 @@
NodeList children = node.getChildNodes();
for (int i = 0; i < children.getLength(); i++) {
Node child = children.item(i);
- NekoSerializer.serialize(child, sb);
+ new NekoSerializer().serialize(child, sb);
}
return sb.toString();
}
Modified: incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/templates/TemplateBasedTagHandlerTest.java
URL: http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/templates/TemplateBasedTagHandlerTest.java?rev=767733&r1=767732&r2=767733&view=diff
==============================================================================
--- incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/templates/TemplateBasedTagHandlerTest.java (original)
+++ incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/templates/TemplateBasedTagHandlerTest.java Thu Apr 23 00:08:55 2009
@@ -172,7 +172,7 @@
NodeList children = node.getChildNodes();
for (int i = 0; i < children.getLength(); i++) {
Node child = children.item(i);
- NekoSerializer.serialize(child, sb);
+ new NekoSerializer().serialize(child, sb);
}
return sb.toString();
}
Added: incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-with-iecond-comments-expected.html
URL: http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-with-iecond-comments-expected.html?rev=767733&view=auto
==============================================================================
--- incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-with-iecond-comments-expected.html (added)
+++ incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-with-iecond-comments-expected.html Thu Apr 23 00:08:55 2009
@@ -0,0 +1,4 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html><head id="head"><link href="http://www.example.org/css.css" rel="stylesheet" type="text/css"><title>An example</title></head><body><!--[if IE 5]>
+ <p>Welcome to Internet Explorer 5.</p>
+ <![endif]--><!--[if IE]><p>You are using Internet Explorer.</p><![endif]--><!--[if !IE]><p>You are not using Internet Explorer.</p><![endif]--><!--[if IE 7]><p>Welcome to Internet Explorer 7!</p><![endif]--><!--[if !(IE 7)]><p>You are not using version 7.</p><![endif]--><!--[if gte IE 7]><p>You are using IE 7 or greater.</p><![endif]--><!--[if (IE 5)]><p>You are using IE 5 (any version).</p><![endif]--><!--[if (gte IE 5.5)&(lt IE 7)]><p>You are using IE 5.5 or IE 6.</p><![endif]--><!--[if lt IE 5.5]><p>Please upgrade your version of Internet Explorer.</p><![endif]--><!--[if true]>You are using an <em>uplevel</em> browser.<![endif]--><!--[if false]>You are using a <em>downlevel</em> browser.<![endif]--><!--[if true]><![if IE 7]><p>This nested comment is displayed in IE 7.</p><![endif]><![endif]--></body></html>
\ No newline at end of file
Added: incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-with-iecond-comments.html
URL: http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-with-iecond-comments.html?rev=767733&view=auto
==============================================================================
--- incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-with-iecond-comments.html (added)
+++ incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-with-iecond-comments.html Thu Apr 23 00:08:55 2009
@@ -0,0 +1,30 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<head id="head">
+ <link href="http://www.example.org/css.css" rel="stylesheet" type="text/css">
+ <title>An example</title>
+</head>
+<body>
+ <!--[if IE 5]>
+ <p>Welcome to Internet Explorer 5.</p>
+ <![endif]-->
+
+ <!--[if IE]><p>You are using Internet Explorer.</p><![endif]-->
+ <!--[if !IE]><p>You are not using Internet Explorer.</p><![endif]-->
+
+ <!--[if IE 7]><p>Welcome to Internet Explorer 7!</p><![endif]-->
+ <!--[if !(IE 7)]><p>You are not using version 7.</p><![endif]-->
+
+ <!--[if gte IE 7]><p>You are using IE 7 or greater.</p><![endif]-->
+ <!--[if (IE 5)]><p>You are using IE 5 (any version).</p><![endif]-->
+ <!--[if (gte IE 5.5)&(lt IE 7)]><p>You are using IE 5.5 or IE 6.</p><![endif]-->
+ <!--[if lt IE 5.5]><p>Please upgrade your version of Internet Explorer.</p><![endif]-->
+
+ <!--[if true]>You are using an <em>uplevel</em> browser.<![endif]-->
+ <!--[if false]>You are using a <em>downlevel</em> browser.<![endif]-->
+
+ <!--[if true]><![if IE 7]><p>This nested comment is displayed in IE 7.</p><![endif]><![endif]-->
+
+ <!-- this standard comment should be removed -->
+</body>
+</html>
\ No newline at end of file
Added: incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-with-specialtags-expected.html
URL: http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-with-specialtags-expected.html?rev=767733&view=auto
==============================================================================
--- incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-with-specialtags-expected.html (added)
+++ incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-with-specialtags-expected.html Thu Apr 23 00:08:55 2009
@@ -0,0 +1,31 @@
+<html><head id="head"><link href="http://www.example.org/css.css" rel="stylesheet" type="text/css"><title>An example</title></head><body><style type="text/css">
+ <!--
+ #mymap #header {
+ background:#FF9700;
+ clear:both;
+ padding:2px 0 1px;
+ position:relative;
+ width:640px;
+ }
+ -->
+ </style><script type="text/javascript">document.write("&&&")</script><script src="http://www.example.org/1.js" type="text/javascript"></script><script>
+ // scripts with no old comment hack should be preserved.
+ function a1() {
+ var v1 = 0;
+ alert(" this whitespace should be preserved.");
+ }
+ </script><div><table><tr><td>a cell</td></tr></table></div><script type="text/javascript">
+ <!--
+ // script with old comment hack should be preserved.
+function MM_goToURL() {
+ var i, args=MM_goToURL.arguments; document.MM_returnValue = false;
+ for (i=0; i<(args.length-1); i+=2) eval(args[i]+".location='"+args[i+1]+"'");
+}
+//-->
+ </script><p>Lorem ipsum</p><a href="/test.html" title="">link</a><pre>
+ This is a preformatted block of text,
+ and whitespaces should be preserved.
+ </pre><form action="/test/submit"><div><input type="hidden" value="something"><input type="text"><textarea>
+ This is a preformatted block of text,
+ and whitespaces should be preserved too.
+</textarea></div></form></body></html>
\ No newline at end of file
Added: incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-with-specialtags.html
URL: http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-with-specialtags.html?rev=767733&view=auto
==============================================================================
--- incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-with-specialtags.html (added)
+++ incubator/shindig/trunk/java/gadgets/src/test/resources/org/apache/shindig/gadgets/parse/nekohtml/test-with-specialtags.html Thu Apr 23 00:08:55 2009
@@ -0,0 +1,55 @@
+<html>
+ <head>
+ <title>An example</title>
+ </head>
+ <body>
+ <style type="text/css">
+ <!--
+ #mymap #header {
+ background:#FF9700;
+ clear:both;
+ padding:2px 0 1px;
+ position:relative;
+ width:640px;
+ }
+ -->
+ </style>
+ <script type="text/javascript">document.write("&&&")</script>
+ <script src="http://www.example.org/1.js" type="text/javascript"></script>
+ <script>
+ // scripts with no old comment hack should be preserved.
+ function a1() {
+ var v1 = 0;
+ alert(" this whitespace should be preserved.");
+ }
+ </script>
+ <div>
+ <table><tr><td>a cell</td></tr></table>
+ </div>
+ <script type="text/javascript">
+ <!--
+ // script with old comment hack should be preserved.
+ function MM_goToURL() {
+ var i, args=MM_goToURL.arguments; document.MM_returnValue = false;
+ for (i=0; i<(args.length-1); i+=2) eval(args[i]+".location='"+args[i+1]+"'");
+ }
+ //-->
+ </script>
+ <p>Lorem ipsum</p>
+ <a href="/test.html" title="">link</a>
+ <pre>
+ This is a preformatted block of text,
+ and whitespaces should be preserved.
+ </pre>
+ <form action="/test/submit">
+ <div>
+ <input type="hidden" value="something">
+ <input type="text"/>
+ <textarea>
+ This is a preformatted block of text,
+ and whitespaces should be preserved too.
+ </textarea>
+</div>
+ </form>
+ </body>
+ </html>
\ No newline at end of file