You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@shindig.apache.org by lr...@apache.org on 2008/11/03 22:41:25 UTC
svn commit: r710176 - in /incubator/shindig/trunk/java/gadgets/src:
main/java/org/apache/shindig/gadgets/parse/
main/java/org/apache/shindig/gadgets/parse/caja/
main/java/org/apache/shindig/gadgets/parse/nekohtml/
main/java/org/apache/shindig/gadgets/r...
Author: lryan
Date: Mon Nov 3 13:41:23 2008
New Revision: 710176
URL: http://svn.apache.org/viewvc?rev=710176&view=rev
Log:
Introduce light-weight Neko parser with a simplified DOM
Abstracted serialization mechanism to provider for parser specific artifacts
Added rewriter benchmarks
Various fixes to existing DOM based content rewriters
Added:
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/HtmlSerializer.java
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSimplifiedHtmlParser.java
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/test.html
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/nekohtml/
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSimplifiedHtmlParserTest.java
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/rewrite/LexerVsDomRewriteBenchmark.java
Modified:
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/ParseModule.java
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/caja/CajaHtmlParser.java
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoHtmlParser.java
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/rewrite/HtmlContentRewriter.java
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/rewrite/JsTagConcatContentRewriter.java
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/rewrite/LinkingTagContentRewriter.java
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/rewrite/MutableContent.java
incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/rewrite/StyleLinksContentRewriter.java
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/HtmlParserTest.java
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/ParseTreeSerializerBenchmark.java
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/rewrite/JsTagConcatContentRewriterTest.java
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/rewrite/LinkingTagContentRewriterTest.java
incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/rewrite/StyleLinksContentRewriterTest.java
Added: incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/HtmlSerializer.java
URL: http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/HtmlSerializer.java?rev=710176&view=auto
==============================================================================
--- incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/HtmlSerializer.java (added)
+++ incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/HtmlSerializer.java Mon Nov 3 13:41:23 2008
@@ -0,0 +1,81 @@
+package org.apache.shindig.gadgets.parse;
+
+import org.w3c.dom.Document;
+
+import java.io.StringWriter;
+
+/**
+ * Serialize a w3c document. An implementation of this interface should be bound
+ * to the document produced by an implementor of HtmlParser and retrieveable via
+ * document.getUserData(HtmlSerializer.KEY)
+ */
+public abstract class HtmlSerializer {
+
+ /**
+ * Used to key an instance of HtmlSerializer in
+ * document.getUserData
+ */
+ private static final String KEY = "serializer";
+
+ /**
+ * Used by a parser to record the original length of the content it parsed
+ * Can be used to optimize output buffers
+ */
+ private static final String ORIGINAL_LENGTH = "original-length";
+
+ /**
+ * Attach a serializer instance to the document
+ * @param doc
+ * @param serializer
+ * @param originalContent may be null
+ */
+ public static void attach(Document doc, HtmlSerializer serializer, String originalContent) {
+ doc.setUserData(KEY, serializer, null);
+ if (originalContent != null) {
+ doc.setUserData(ORIGINAL_LENGTH, originalContent.length(), null);
+ }
+ }
+
+ /**
+ * Get the length of the original version of the document
+ * @param doc
+ * @return
+ */
+ protected static int getOriginalLength(Document doc) {
+ Integer length = (Integer)doc.getUserData(ORIGINAL_LENGTH);
+ if (length == null) return -1;
+ return length;
+ }
+
+ /**
+ * Create a writer sized to the original length of the document
+ * @param doc
+ * @return
+ */
+ protected static StringWriter createWriter(Document doc) {
+ int originalLength = getOriginalLength(doc);
+ if (originalLength == -1) {
+ return new StringWriter(8192);
+ } else {
+ // Typically rewriting makes a document larger
+ return new StringWriter((originalLength * 11) / 10);
+ }
+ }
+
+ /**
+ * Call the attached serializer and output the document
+ * @param doc
+ * @return
+ */
+ public static String serialize(Document doc) {
+ return ((HtmlSerializer)doc.getUserData(KEY)).serializeImpl(doc);
+ }
+
+ /**
+ * Overridden by implementations
+ * @param doc
+ * @return
+ */
+ protected abstract String serializeImpl(Document doc);
+
+}
Modified: incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/ParseModule.java
URL: http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/ParseModule.java?rev=710176&r1=710175&r2=710176&view=diff
==============================================================================
--- incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/ParseModule.java (original)
+++ incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/ParseModule.java Mon Nov 3 13:41:23 2008
@@ -17,12 +17,11 @@
*/
package org.apache.shindig.gadgets.parse;
-import org.apache.shindig.gadgets.parse.caja.CajaHtmlParser;
-
import com.google.inject.AbstractModule;
import com.google.inject.Provider;
-
-import org.w3c.dom.html.HTMLDocument;
+import org.apache.shindig.gadgets.parse.caja.CajaHtmlParser;
+import org.w3c.dom.DOMImplementation;
+import org.w3c.dom.bootstrap.DOMImplementationRegistry;
/**
* Provide parse bindings
@@ -36,36 +35,47 @@
protected void configure() {
//bind(GadgetHtmlParser.class).to(NekoHtmlParser.class);
bind(GadgetHtmlParser.class).to(CajaHtmlParser.class);
- bind(HTMLDocument.class).toProvider(HTMLDocumentProvider.class);
+ bind(DOMImplementation.class).toProvider(DOMImplementationProvider.class);
}
/**
* Provider of new HTMLDocument implementations. Used to hide XML parser weirdness
*/
- public static class HTMLDocumentProvider implements Provider<HTMLDocument> {
+ public static class DOMImplementationProvider implements Provider<DOMImplementation> {
- Class htmlDocImpl;
+ DOMImplementation domImpl;
- public HTMLDocumentProvider() {
+ public DOMImplementationProvider() {
+ try {
+ DOMImplementationRegistry registry = DOMImplementationRegistry.newInstance();
+ // Require the traversal API
+ domImpl = registry.getDOMImplementation("XML 1.0 Traversal 2.0");
+ } catch (Exception e) {
+ // Try another
+ }
// This is ugly but effective
try {
- htmlDocImpl = Class.forName("org.apache.html.dom.HTMLDocumentImpl");
- } catch (ClassNotFoundException cnfe) {
- try {
- htmlDocImpl = Class.forName("com.sun.org.apache.html.internal.dom.HTMLDocumentImpl");
- } catch (ClassNotFoundException cnfe2) {
- throw new RuntimeException("Could not find HTML DOM implementation", cnfe2);
+ if (domImpl == null) {
+ domImpl = (DOMImplementation)
+ Class.forName("org.apache.xerces.internal.dom.DOMImplementationImpl").
+ getMethod("getDOMImplementation").invoke(null);
}
+ } catch (Exception ex) {
+ //try another
}
- }
-
- public HTMLDocument get() {
try {
- return (HTMLDocument) htmlDocImpl.newInstance();
- } catch (Exception e) {
- throw new RuntimeException("Could not create HTML DOM from class "
- + htmlDocImpl.getName(), e);
+ if (domImpl == null) {
+ domImpl = (DOMImplementation)
+ Class.forName("com.sun.org.apache.xerces.internal.dom.DOMImplementationImpl").
+ getMethod("getDOMImplementation").invoke(null);
+ }
+ } catch (Exception ex) {
+ throw new RuntimeException("Could not find HTML DOM implementation", ex);
}
}
+
+ public DOMImplementation get() {
+ return domImpl;
+ }
}
}
Modified: incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/caja/CajaHtmlParser.java
URL: http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/caja/CajaHtmlParser.java?rev=710176&r1=710175&r2=710176&view=diff
==============================================================================
--- incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/caja/CajaHtmlParser.java (original)
+++ incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/caja/CajaHtmlParser.java Mon Nov 3 13:41:23 2008
@@ -23,17 +23,17 @@
import com.google.caja.reporting.MessageQueue;
import com.google.caja.reporting.SimpleMessageQueue;
import com.google.inject.Inject;
-import com.google.inject.Provider;
import com.google.inject.Singleton;
import org.apache.shindig.gadgets.GadgetException;
import org.apache.shindig.gadgets.parse.GadgetHtmlParser;
-import org.w3c.dom.Attr;
-import org.w3c.dom.Document;
-import org.w3c.dom.Element;
-import org.w3c.dom.Node;
-import org.w3c.dom.html.HTMLDocument;
+import org.apache.shindig.gadgets.parse.HtmlSerializer;
+import org.apache.xml.serialize.HTMLSerializer;
+import org.apache.xml.serialize.OutputFormat;
+import org.w3c.dom.*;
+import java.io.IOException;
import java.io.StringReader;
+import java.io.StringWriter;
import java.net.URI;
import java.net.URISyntaxException;
@@ -43,17 +43,19 @@
@Singleton
public class CajaHtmlParser extends GadgetHtmlParser {
- Provider<HTMLDocument> documentProvider;
+ private final DOMImplementation documentProvider;
@Inject
- public CajaHtmlParser(Provider<HTMLDocument> documentProvider) {
+ public CajaHtmlParser(DOMImplementation documentProvider) {
this.documentProvider = documentProvider;
}
@Override
public Document parseDom(String source) throws GadgetException {
// Wrap the whole thing in a top-level node to get full contents.
- return makeDocument(getFragment(source));
+ Document document = makeDocument(getFragment(source));
+ HtmlSerializer.attach(document, new Serializer(), source);
+ return document;
}
DomTree.Fragment getFragment(String content) throws GadgetException {
@@ -87,8 +89,8 @@
return new DomParser(new TokenQueue<HtmlTokenType>(lexer, source), false, mQueue);
}
- private HTMLDocument makeDocument(DomTree.Fragment fragment) {
- HTMLDocument htmlDocument = documentProvider.get();
+ private Document makeDocument(DomTree.Fragment fragment) {
+ Document htmlDocument = documentProvider.createDocument(null, null, null);
// Check if doc contains an HTML node. If so just add it and recurse
for (DomTree node : fragment.children()) {
@@ -105,7 +107,7 @@
return htmlDocument;
}
- private static void recurseDocument(HTMLDocument doc, Node parent, DomTree elem) {
+ private static void recurseDocument(Document doc, Node parent, DomTree elem) {
if (elem instanceof DomTree.Tag) {
DomTree.Tag tag = (DomTree.Tag) elem;
Element element = doc.createElement(tag.getTagName());
@@ -127,4 +129,24 @@
// TODO Implement for comment, fragment etc...
}
}
+
+ static class Serializer extends HtmlSerializer {
+
+ static final OutputFormat outputFormat = new OutputFormat();
+ static {
+ outputFormat.setPreserveSpace(true);
+ outputFormat.setPreserveEmptyAttributes(false);
+ }
+
+ public String serializeImpl(Document doc) {
+ StringWriter sw = createWriter(doc);
+ HTMLSerializer serializer = new HTMLSerializer(sw, outputFormat);
+ try {
+ serializer.serialize(doc);
+ return sw.toString();
+ } catch (IOException ioe) {
+ return null;
+ }
+ }
+ }
}
Modified: incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoHtmlParser.java
URL: http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoHtmlParser.java?rev=710176&r1=710175&r2=710176&view=diff
==============================================================================
--- incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoHtmlParser.java (original)
+++ incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoHtmlParser.java Mon Nov 3 13:41:23 2008
@@ -18,20 +18,23 @@
package org.apache.shindig.gadgets.parse.nekohtml;
import com.google.inject.Inject;
-import com.google.inject.Provider;
import org.apache.shindig.common.xml.XmlUtil;
import org.apache.shindig.gadgets.GadgetException;
import org.apache.shindig.gadgets.parse.GadgetHtmlParser;
+import org.apache.shindig.gadgets.parse.HtmlSerializer;
+import org.apache.xml.serialize.HTMLSerializer;
+import org.apache.xml.serialize.OutputFormat;
import org.cyberneko.html.parsers.DOMFragmentParser;
+import org.w3c.dom.DOMImplementation;
import org.w3c.dom.Document;
import org.w3c.dom.DocumentFragment;
import org.w3c.dom.Node;
-import org.w3c.dom.html.HTMLDocument;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import java.io.IOException;
import java.io.StringReader;
+import java.io.StringWriter;
/**
* Parser that uses the NekoHtml parser.
@@ -43,17 +46,19 @@
*/
public class NekoHtmlParser extends GadgetHtmlParser {
- Provider<HTMLDocument> documentProvider;
+ private final DOMImplementation documentProvider;
@Inject
- public NekoHtmlParser(Provider<HTMLDocument> documentProvider) {
+ public NekoHtmlParser(DOMImplementation documentProvider) {
this.documentProvider = documentProvider;
}
@Override
public Document parseDom(String source) throws GadgetException {
try {
- return parseFragment(source);
+ Document document = parseFragment(source);
+ HtmlSerializer.attach(document, new Serializer(), source);
+ return document;
} catch (Exception e) {
throw new GadgetException(GadgetException.Code.HTML_PARSE_ERROR, e);
}
@@ -63,7 +68,7 @@
InputSource input = new InputSource(new StringReader(source));
DOMFragmentParser parser = new DOMFragmentParser();
- HTMLDocument htmlDoc = documentProvider.get();
+ Document htmlDoc = documentProvider.createDocument(null, null, null);
DocumentFragment fragment = htmlDoc.createDocumentFragment();
parser.parse(input, fragment);
Node htmlNode = XmlUtil.getFirstNamedChildNode(fragment, "HTML");
@@ -75,4 +80,24 @@
}
return htmlDoc;
}
+
+ static class Serializer extends HtmlSerializer {
+
+ static final OutputFormat outputFormat = new OutputFormat();
+ static {
+ outputFormat.setPreserveSpace(true);
+ outputFormat.setPreserveEmptyAttributes(false);
+ }
+
+ public String serializeImpl(Document doc) {
+ StringWriter sw = createWriter(doc);
+ HTMLSerializer serializer = new HTMLSerializer(sw, outputFormat);
+ try {
+ serializer.serialize(doc);
+ return sw.toString();
+ } catch (IOException ioe) {
+ return null;
+ }
+ }
+ }
}
Added: incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSimplifiedHtmlParser.java
URL: http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSimplifiedHtmlParser.java?rev=710176&view=auto
==============================================================================
--- incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSimplifiedHtmlParser.java (added)
+++ incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSimplifiedHtmlParser.java Mon Nov 3 13:41:23 2008
@@ -0,0 +1,351 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ */
+package org.apache.shindig.gadgets.parse.nekohtml;
+
+import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.Lists;
+import com.google.inject.Inject;
+import org.apache.shindig.common.xml.XmlUtil;
+import org.apache.shindig.gadgets.parse.GadgetHtmlParser;
+import org.apache.shindig.gadgets.parse.HtmlSerializer;
+import org.apache.xerces.xni.*;
+import org.apache.xerces.xni.parser.XMLDocumentSource;
+import org.apache.xerces.xni.parser.XMLInputSource;
+import org.apache.xml.serialize.HTMLSerializer;
+import org.apache.xml.serialize.OutputFormat;
+import org.cyberneko.html.HTMLEventInfo;
+import org.cyberneko.html.HTMLScanner;
+import org.cyberneko.html.HTMLTagBalancer;
+import org.w3c.dom.*;
+
+import java.io.IOException;
+import java.io.StringReader;
+import java.io.StringWriter;
+import java.util.List;
+import java.util.Set;
+import java.util.Stack;
+
+/**
+ * Creates a greatly simplified DOM model that contains elements for only the specified
+ * element set and creates unescaped text nodes for all other content.
+ * It requires special serialization to prevent escaping of text nodes but behaves like a
+ * regular DOM in all other respects. Only element types which are produced are balanced.
+ */
+public class NekoSimplifiedHtmlParser extends GadgetHtmlParser {
+
+ private static final Set<String> elements =
+ ImmutableSet.of("html", "body", "head", "link", "img", "style", "script", "embed");
+
+ private final DOMImplementation documentFactory;
+
+ @Inject
+ public NekoSimplifiedHtmlParser(DOMImplementation documentFactory) {
+ this.documentFactory = documentFactory;
+ }
+
+ public Document parseDom(String source) {
+
+ HTMLScanner htmlScanner = new HTMLScanner();
+ HTMLTagBalancer tagBalancer = new HTMLTagBalancer();
+ DocumentHandler handler = new DocumentHandler(source);
+ tagBalancer.setDocumentHandler(handler);
+ htmlScanner.setDocumentHandler(tagBalancer);
+ tagBalancer.setFeature("http://cyberneko.org/html/features/augmentations", true);
+ htmlScanner.setFeature("http://cyberneko.org/html/features/augmentations", true);
+
+ XMLInputSource inputSource = new XMLInputSource(null, null, null);
+ inputSource.setEncoding("UTF-8");
+ inputSource.setCharacterStream(new StringReader(source));
+ try {
+ htmlScanner.setInputSource(inputSource);
+ htmlScanner.scanDocument(true);
+ Document document = handler.getDocument();
+ DocumentFragment fragment = handler.getFragment();
+ Node htmlNode = XmlUtil.getFirstNamedChildNode(fragment, "HTML");
+ if (htmlNode != null) {
+ document.appendChild(htmlNode);
+ } else {
+ Node root = document.appendChild(document.createElement("HTML"));
+ root.appendChild(fragment);
+ }
+ HtmlSerializer.attach(document, new Serializer(), source);
+ return document;
+ } catch (IOException ioe) {
+ return null;
+ }
+ }
+
+
+ /**
+ * Handler for XNI events from Neko
+ */
+ private class DocumentHandler implements XMLDocumentHandler {
+ private final List<Integer> lines;
+ private final Stack<Node> elementStack = new Stack<Node>();
+ private final int[] startCharOffsets;
+ private final int[] lastCharOffsets;
+ private DocumentFragment documentFragment;
+ private Document document;
+ private final String content;
+
+ public DocumentHandler(String content) {
+ this.content = content;
+ // Populate lines
+ lines = Lists.newArrayListWithExpectedSize(content.length() / 30);
+ lines.add(0);
+ for (int i = 0; i < content.length(); i++) {
+ char c = content.charAt(i);
+ if (c == '\n' || c == '\r') {
+ if (i + 1 < content.length() && (c == '\r' && content.charAt(i+1) == '\n')) {
+ i++;
+ lines.add(i);
+ } else {
+ lines.add(i);
+ }
+ }
+ }
+ startCharOffsets = new int[]{-1,-1};
+ lastCharOffsets = new int[]{-1,-1};
+ }
+
+ public DocumentFragment getFragment() {
+ return documentFragment;
+ }
+
+ public Document getDocument() {
+ return document;
+ }
+
+ private HTMLEventInfo getEventInfo(Augmentations augmentations) {
+ HTMLEventInfo htmlEventInfo =
+ (HTMLEventInfo) augmentations.getItem("http://cyberneko.org/html/features/augmentations");
+ return htmlEventInfo;
+ }
+
+ private String getUnstructuredString(int[] start, int[] end) {
+ if (start[0] == -1) return "";
+
+ int charStart = start[0];
+ int charEnd;
+ if (end[0] == -1) {
+ charEnd = start[1];
+ } else {
+ charEnd = end[1];
+ }
+ String s = content.substring(charStart, charEnd);
+ return s;
+ }
+
+ private void recordStartEnd(HTMLEventInfo info, int[] offsets) {
+ offsets[0] = lines.get(info.getBeginLineNumber() - 1) + info.getBeginColumnNumber() - 1;
+ offsets[1] = lines.get(info.getEndLineNumber() - 1) + info.getEndColumnNumber() - 1;
+ }
+
+ public void handleEvent(boolean shouldClose, Object content, Augmentations augs) {
+ HTMLEventInfo info = getEventInfo(augs);
+ if (info.isSynthesized()) {
+ // NOTE! Remove this to balance syntesized close tags
+ if (!shouldClose) return;
+ // Must close with existing content
+ String unstructured = getUnstructuredString(startCharOffsets, lastCharOffsets);
+ elementStack.peek().appendChild(document.createTextNode(unstructured));
+ startCharOffsets[0] = -1;
+ lastCharOffsets[0] = -1;
+ if (content != null) {
+ elementStack.peek().appendChild(document.createTextNode(content.toString()));
+ }
+ } else {
+ if (shouldClose) {
+ String unstructured = getUnstructuredString(startCharOffsets, lastCharOffsets);
+ elementStack.peek().appendChild(document.createTextNode(unstructured));
+ startCharOffsets[0] = -1;
+ lastCharOffsets[0] = -1;
+ } else if (startCharOffsets[0] == -1) {
+ recordStartEnd(info, startCharOffsets);
+ lastCharOffsets[0] = -1;
+ } else {
+ recordStartEnd(info, lastCharOffsets);
+ }
+ }
+ }
+
+ private void trace(String prefix, Augmentations augmentations) {
+ HTMLEventInfo info = getEventInfo(augmentations);
+ String text = "";
+ if (!info.isSynthesized()) {
+ int[] startEnd = new int[2];
+ recordStartEnd(info, startEnd);
+ text = content.substring(startEnd[0], startEnd[1]);
+ text = text.replaceAll("\n", "\\n");
+ text = text.replaceAll("\r", "\\r");
+ }
+ System.out.println("Event " + prefix + info.toString() + " -> " + text);
+ }
+
+ public void startDocument(XMLLocator xmlLocator, String encoding,
+ NamespaceContext namespaceContext, Augmentations augs) throws XNIException {
+ document = documentFactory.createDocument(null, null, null);
+ elementStack.clear();
+ documentFragment = document.createDocumentFragment();
+ elementStack.push(documentFragment);
+ //trace("StartDoc", augs);
+ }
+
+ public void xmlDecl(String version, String encoding, String standalone, Augmentations augs) throws XNIException {
+ //trace("xmlDecl", augs);
+ handleEvent(false, null, augs);
+ }
+
+ public void doctypeDecl(String rootElement, String publicId, String systemId, Augmentations augs) throws XNIException {
+ // Recreate the document with the specific doctype
+ document = documentFactory.createDocument(null, null,
+ documentFactory.createDocumentType(rootElement, publicId, systemId));
+ elementStack.clear();
+ documentFragment = document.createDocumentFragment();
+ elementStack.push(documentFragment);
+ //trace("docTypeDecl", augs);
+ handleEvent(false, null, augs);
+ }
+
+ public void comment(XMLString xmlString, Augmentations augs) throws XNIException {
+ //trace("comment", augs);
+ handleEvent(false, xmlString, augs);
+ //trackInfo(augs);
+ }
+
+ public void processingInstruction(String s, XMLString xmlString, Augmentations augs) throws XNIException {
+ //trace("PI", augs);
+ handleEvent(false, xmlString, augs);
+ }
+
+ public void startElement(QName qName, XMLAttributes xmlAttributes, Augmentations augs) throws XNIException {
+ //trace("StartElem(" + qName.rawname + ")", augs);
+ if (elements.contains(qName.rawname.toLowerCase())) {
+ handleEvent(true, null, augs);
+ Element element = document.createElement(qName.rawname);
+ for (int i = 0; i < xmlAttributes.getLength(); i++) {
+ element.setAttribute(xmlAttributes.getLocalName(i) , xmlAttributes.getValue(i));
+ }
+ elementStack.peek().appendChild(element);
+ elementStack.push(element);
+ } else {
+ handleEvent(false, null, augs);
+ }
+ }
+
+ public void emptyElement(QName qName, XMLAttributes xmlAttributes, Augmentations augs) throws XNIException {
+ //trace("EmptyElemm(" + qName.rawname + ")", augs);
+ if (elements.contains(qName.rawname.toLowerCase())) {
+ handleEvent(true, null, augs);
+ Element element = document.createElement(qName.rawname);
+ for (int i = 0; i < xmlAttributes.getLength(); i++) {
+ element.setAttribute(xmlAttributes.getLocalName(i) , xmlAttributes.getValue(i));
+ }
+ elementStack.peek().appendChild(element);
+ } else {
+ handleEvent(false, null, augs);
+ }
+
+ }
+
+ public void startGeneralEntity(String s, XMLResourceIdentifier xmlResourceIdentifier, String s1, Augmentations augs) throws XNIException {
+ //trace("StartEntity(" + s + ")", augs);
+ handleEvent(false, null, augs);
+ }
+
+ public void textDecl(String s, String s1, Augmentations augs) throws XNIException {
+ //trace("Textdecl(" + s + ")", augs);
+ handleEvent(false, null, augs);
+ }
+
+ public void endGeneralEntity(String s, Augmentations augs) throws XNIException {
+ //trace("EndEntity(" + s + ")", augs);
+ handleEvent(false, null, augs);
+ }
+
+ public void characters(XMLString xmlString, Augmentations augs) throws XNIException {
+ handleEvent(false, xmlString, augs);
+ }
+
+ public void ignorableWhitespace(XMLString xmlString, Augmentations augs) throws XNIException {
+ //trace("Whitespace", augs);
+ handleEvent(false, xmlString, augs);
+ //trackInfo(augs);
+ }
+
+ public void endElement(QName qName, Augmentations augs) throws XNIException {
+ //trace("EndElem(" + qName.rawname + ")", augs);
+ if (elements.contains(qName.rawname.toLowerCase())) {
+ handleEvent(true, null, augs);
+ // FIXME - Balancer
+ elementStack.pop();
+ } else {
+ handleEvent(false, "</" + qName.rawname + ">", augs);
+ }
+ }
+
+ public void startCDATA(Augmentations augs) throws XNIException {
+ //trace("startCData", augs);
+ handleEvent(false, null, augs);
+ }
+
+ public void endCDATA(Augmentations augs) throws XNIException {
+ //trace("endCData", augs);
+ handleEvent(false, null, augs);
+ }
+
+ public void endDocument(Augmentations augs) throws XNIException {
+ //trace("endDoc", augs);
+ handleEvent(false, null, augs);
+ }
+
+ public void setDocumentSource(XMLDocumentSource xmlDocumentSource) {
+ }
+
+ public XMLDocumentSource getDocumentSource() {
+ return null;
+ }
+ }
+
+ static class Serializer extends HtmlSerializer {
+
+ static final OutputFormat outputFormat = new OutputFormat();
+ static {
+ outputFormat.setPreserveSpace(true);
+ outputFormat.setPreserveEmptyAttributes(false);
+ }
+
+ public String serializeImpl(Document doc) {
+ StringWriter sw = createWriter(doc);
+ HTMLSerializer serializer = new HTMLSerializer(sw, outputFormat) {
+ // Overridden to prevent escaping of literal text
+ @Override
+ protected void characters(String s) throws IOException {
+ this.content();
+ this._printer.printText(s);
+ }
+ };
+ try {
+ serializer.serialize(doc);
+ return sw.toString();
+ } catch (IOException ioe) {
+ return null;
+ }
+ }
+ }
+}
Added: incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/test.html
URL: http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/test.html?rev=710176&view=auto
==============================================================================
--- incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/test.html (added)
+++ incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/test.html Mon Nov 3 13:41:23 2008
@@ -0,0 +1,28 @@
+<html><body><select><option>content<option></body></html>
+
+Balanced
+Event StartDoc1:1:1:1 ->
+Event StartElem(html)1:1:1:7 -> <html>
+Event StartElem(body)1:7:1:13 -> <body>
+Event StartElem(select)1:13:1:21 -> <select>
+Event StartElem(option)1:21:1:29 -> <option>
+Event Chars(content)1:29:1:36 -> content
+Event EndElem(option)synthesized ->
+Event StartElem(option)1:36:1:44 -> <option>
+Event EndElem(option)synthesized ->
+Event EndElem(select)synthesized ->
+Event EndElem(body)synthesized ->
+Event EndElem(html)synthesized ->
+Event endDoc1:58:1:58 ->
+
+Unbalanced
+Event StartDoc1:1:1:1 ->
+Event StartElem(html)1:1:1:7 -> <html>
+Event StartElem(body)1:7:1:13 -> <body>
+Event StartElem(select)1:13:1:21 -> <select>
+Event StartElem(option)1:21:1:29 -> <option>
+Event Chars(content)1:29:1:36 -> content
+Event StartElem(option)1:36:1:44 -> <option>
+Event EndElem(body)1:44:1:51 -> </body>
+Event EndElem(html)1:51:1:58 -> </html>
+Event endDoc1:58:1:58 ->
Modified: incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/rewrite/HtmlContentRewriter.java
URL: http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/rewrite/HtmlContentRewriter.java?rev=710176&r1=710175&r2=710176&view=diff
==============================================================================
--- incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/rewrite/HtmlContentRewriter.java (original)
+++ incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/rewrite/HtmlContentRewriter.java Mon Nov 3 13:41:23 2008
@@ -18,14 +18,21 @@
*/
package org.apache.shindig.gadgets.rewrite;
+import com.google.common.collect.Lists;
import org.apache.shindig.common.uri.Uri;
import org.apache.shindig.gadgets.Gadget;
import org.apache.shindig.gadgets.http.HttpRequest;
import org.apache.shindig.gadgets.http.HttpResponse;
import org.apache.shindig.gadgets.spec.View;
import org.w3c.dom.Document;
+import org.w3c.dom.Node;
+import org.w3c.dom.traversal.DocumentTraversal;
+import org.w3c.dom.traversal.NodeFilter;
+import org.w3c.dom.traversal.NodeIterator;
import java.net.URI;
+import java.util.List;
+import java.util.Set;
/**
* Simple helper base class for ContentRewriters that manipulate an
@@ -70,4 +77,22 @@
return null;
}
+ public static List<Node> getElementsByTagNameCaseInsensitive(Document doc,
+ final Set<String> lowerCaseNames) {
+ final List<Node> result = Lists.newArrayList();
+ NodeIterator nodeIterator = ((DocumentTraversal) doc)
+ .createNodeIterator(doc, NodeFilter.SHOW_ELEMENT,
+ new NodeFilter() {
+ public short acceptNode(Node n) {
+ if (lowerCaseNames.contains(n.getNodeName().toLowerCase())) {
+ return NodeFilter.FILTER_ACCEPT;
+ }
+ return NodeFilter.FILTER_REJECT;
+ }
+ }, false);
+ for (Node n = nodeIterator.nextNode(); n != null ; n = nodeIterator.nextNode()) {
+ result.add(n);
+ }
+ return result;
+ }
}
Modified: incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/rewrite/JsTagConcatContentRewriter.java
URL: http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/rewrite/JsTagConcatContentRewriter.java?rev=710176&r1=710175&r2=710176&view=diff
==============================================================================
--- incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/rewrite/JsTagConcatContentRewriter.java (original)
+++ incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/rewrite/JsTagConcatContentRewriter.java Mon Nov 3 13:41:23 2008
@@ -19,6 +19,7 @@
package org.apache.shindig.gadgets.rewrite;
import com.google.common.collect.Lists;
+import com.google.common.collect.Sets;
import org.apache.shindig.common.uri.Uri;
import org.apache.shindig.common.util.Utf8UrlCoder;
import org.apache.shindig.gadgets.Gadget;
@@ -28,11 +29,11 @@
import org.apache.shindig.gadgets.spec.GadgetSpec;
import org.apache.shindig.gadgets.spec.View;
import org.w3c.dom.Node;
-import org.w3c.dom.NodeList;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.util.ArrayList;
+import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
@@ -43,6 +44,7 @@
private final String concatUrlBase;
private static final String DEFAULT_CONCAT_URL_BASE = "/gadgets/concat?";
+ private static final HashSet<String> TAG_NAMES = Sets.newHashSet("script");
public JsTagConcatContentRewriter(ContentRewriterFeature.Factory rewriterFeatureFactory,
String concatUrlBase) {
@@ -68,14 +70,9 @@
}
// Get all the script tags
- NodeList scriptTags = content.getDocument().getElementsByTagName("SCRIPT");
+ List<Node> nodeList =
+ HtmlContentRewriter.getElementsByTagNameCaseInsensitive(content.getDocument(), TAG_NAMES);
- // Copy NodeList as it respects changes to the underlying document which is a
- // behavior we dont want when removing nodes
- List<Node> nodeList = Lists.newArrayListWithExpectedSize(scriptTags.getLength());
- for (int i = 0; i < scriptTags.getLength(); i++) {
- nodeList.add(scriptTags.item(i));
- }
String concatBase = getJsConcatBase(gadget.getSpec(), rewriterFeature);
Uri contentBase = gadget.getSpec().getUrl();
Modified: incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/rewrite/LinkingTagContentRewriter.java
URL: http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/rewrite/LinkingTagContentRewriter.java?rev=710176&r1=710175&r2=710176&view=diff
==============================================================================
--- incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/rewrite/LinkingTagContentRewriter.java (original)
+++ incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/rewrite/LinkingTagContentRewriter.java Mon Nov 3 13:41:23 2008
@@ -55,7 +55,7 @@
.createNodeIterator(root, NodeFilter.SHOW_ELEMENT,
new NodeFilter() {
public short acceptNode(Node n) {
- Set<String> stringSet = tagAttributeTargets.get(n.getNodeName());
+ Set<String> stringSet = tagAttributeTargets.get(n.getNodeName().toUpperCase());
if (stringSet != null) {
NamedNodeMap attributes = n.getAttributes();
// TODO - Check is NodeMap lookup is case insensitive, if so use that
Modified: incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/rewrite/MutableContent.java
URL: http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/rewrite/MutableContent.java?rev=710176&r1=710175&r2=710176&view=diff
==============================================================================
--- incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/rewrite/MutableContent.java (original)
+++ incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/rewrite/MutableContent.java Mon Nov 3 13:41:23 2008
@@ -19,13 +19,9 @@
import org.apache.shindig.gadgets.GadgetException;
import org.apache.shindig.gadgets.parse.GadgetHtmlParser;
-import org.apache.xml.serialize.HTMLSerializer;
-import org.apache.xml.serialize.OutputFormat;
+import org.apache.shindig.gadgets.parse.HtmlSerializer;
import org.w3c.dom.Document;
-import java.io.IOException;
-import java.io.StringWriter;
-
/**
* Object that maintains a String representation of arbitrary contents
* and a consistent view of those contents as an HTML parse tree.
@@ -70,14 +66,9 @@
// per rendering cycle: all rewriters (or other manipulators)
// operating on the parse tree should happen together.
contentParseId = parseEditId;
- StringWriter sw = new StringWriter((content.length() * 10) / 9);
- try {
- new HTMLSerializer(sw, new OutputFormat(document)).serialize(document);
- } catch (IOException e) {
- // Never happens.
- }
- content = sw.toString();
+ // Parser will have bound an HTML serializer to the document
+ content = HtmlSerializer.serialize(document);
}
return content;
}
Modified: incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/rewrite/StyleLinksContentRewriter.java
URL: http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/rewrite/StyleLinksContentRewriter.java?rev=710176&r1=710175&r2=710176&view=diff
==============================================================================
--- incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/rewrite/StyleLinksContentRewriter.java (original)
+++ incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/rewrite/StyleLinksContentRewriter.java Mon Nov 3 13:41:23 2008
@@ -18,16 +18,18 @@
*/
package org.apache.shindig.gadgets.rewrite;
+import com.google.common.collect.Sets;
import org.apache.shindig.common.uri.Uri;
+import org.apache.shindig.common.xml.XmlUtil;
import org.apache.shindig.gadgets.Gadget;
import org.apache.shindig.gadgets.http.HttpRequest;
import org.apache.shindig.gadgets.http.HttpResponse;
import org.apache.shindig.gadgets.spec.View;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
-import org.w3c.dom.NodeList;
import java.net.URI;
+import java.util.List;
public class StyleLinksContentRewriter implements ContentRewriter {
// TODO: consider providing helper base class for node-visitor content rewriters
@@ -74,20 +76,18 @@
}
boolean mutated = false;
- Node head;
- NodeList headTags = doc.getElementsByTagName("HEAD");
- if (headTags.getLength() == 0) {
+ // TODO This should move into parsers
+ Node head = XmlUtil.getFirstNamedChildNode(doc.getDocumentElement(), "head");
+ if (head == null) {
mutated = true;
- head = doc.getDocumentElement().appendChild(doc.createElement("HEAD"));
- } else {
- head = headTags.item(0);
+ head = doc.getDocumentElement().appendChild(doc.createElement("head"));
}
// Move all style tags into head
// TODO Convert all @imports into a concatenated link tag
- NodeList styleTags = doc.getElementsByTagName("STYLE");
- for (int i = 0; i < styleTags.getLength(); i++) {
- Node styleNode = styleTags.item(i);
+ List<Node> styleTags = HtmlContentRewriter.getElementsByTagNameCaseInsensitive(doc,
+ Sets.newHashSet("style"));
+ for (Node styleNode : styleTags) {
mutated = true;
if (!styleNode.getParentNode().getNodeName().equalsIgnoreCase("HEAD")) {
styleNode.getParentNode().removeChild(styleNode);
Modified: incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/HtmlParserTest.java
URL: http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/HtmlParserTest.java?rev=710176&r1=710175&r2=710176&view=diff
==============================================================================
--- incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/HtmlParserTest.java (original)
+++ incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/HtmlParserTest.java Mon Nov 3 13:41:23 2008
@@ -31,10 +31,10 @@
public class HtmlParserTest extends TestCase {
private final GadgetHtmlParser cajaParser = new CajaHtmlParser(
- new ParseModule.HTMLDocumentProvider());
+ new ParseModule.DOMImplementationProvider().get());
private final GadgetHtmlParser nekoParser = new NekoHtmlParser(
- new ParseModule.HTMLDocumentProvider());
+ new ParseModule.DOMImplementationProvider().get());
public void testParseSimpleString() throws Exception {
parseSimpleString(cajaParser);
@@ -57,7 +57,7 @@
parseTagWithStringContents(cajaParser);
}
- public void parseTagWithStringContents(GadgetHtmlParser htmlParser) throws Exception {
+ void parseTagWithStringContents(GadgetHtmlParser htmlParser) throws Exception {
Document doc = htmlParser.parseDom("<span>content</span>");
Node node = doc.getDocumentElement().getFirstChild();
Modified: incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/ParseTreeSerializerBenchmark.java
URL: http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/ParseTreeSerializerBenchmark.java?rev=710176&r1=710175&r2=710176&view=diff
==============================================================================
--- incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/ParseTreeSerializerBenchmark.java (original)
+++ incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/ParseTreeSerializerBenchmark.java Mon Nov 3 13:41:23 2008
@@ -21,28 +21,40 @@
import org.apache.shindig.gadgets.GadgetException;
import org.apache.shindig.gadgets.parse.caja.CajaHtmlParser;
import org.apache.shindig.gadgets.parse.nekohtml.NekoHtmlParser;
-import org.apache.xml.serialize.HTMLSerializer;
-import org.cyberneko.html.parsers.SAXParser;
-import org.w3c.dom.Node;
+import org.apache.shindig.gadgets.parse.nekohtml.NekoSimplifiedHtmlParser;
+import org.w3c.dom.DOMImplementation;
import org.w3c.dom.bootstrap.DOMImplementationRegistry;
-import org.w3c.dom.ls.*;
-import java.io.*;
+import javax.xml.transform.OutputKeys;
+import javax.xml.transform.Transformer;
+import javax.xml.transform.TransformerFactory;
+import javax.xml.transform.dom.DOMSource;
+import javax.xml.transform.stream.StreamResult;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.StringWriter;
/**
* Benchmarks for HTML parsing and serialization
- *
- * NOTE - Uncomment DOM4J bits to test that.
*/
public class ParseTreeSerializerBenchmark {
private DOMImplementationRegistry registry = DOMImplementationRegistry.newInstance();
private int numRuns;
private String content;
- private GadgetHtmlParser cajaParser = new CajaHtmlParser(new ParseModule.HTMLDocumentProvider());
- private GadgetHtmlParser nekoParser = new NekoHtmlParser(new ParseModule.HTMLDocumentProvider());
+
+ private GadgetHtmlParser cajaParser = new CajaHtmlParser(
+ DOCUMENT_PROVIDER);
+
+ private GadgetHtmlParser nekoParser = new NekoHtmlParser(
+ DOCUMENT_PROVIDER);
+
+ private GadgetHtmlParser nekoSimpleParser = new NekoSimplifiedHtmlParser(
+ DOCUMENT_PROVIDER);
+
private boolean warmup;
- private SAXParser saxParser;
- //private SAXReader saxReader;
+
+ private static final DOMImplementation DOCUMENT_PROVIDER =
+ new ParseModule.DOMImplementationProvider().get();
private ParseTreeSerializerBenchmark(String file, int numRuns) throws Exception {
File inputFile = new File(file);
@@ -52,23 +64,18 @@
}
content = new String(IOUtils.toByteArray(new FileInputStream(file)));
- saxParser = new SAXParser();
- //saxParser.setFeature("http://cyberneko.org/html/features/scanner/script/strip-comment-delims",true);
- saxParser.setFeature("http://cyberneko.org/html/features/scanner/notify-builtin-refs",true);
- //saxReader = new SAXReader(saxParser);
- //saxReader.setValidation(false);
-
- this.numRuns = 50;
+ this.numRuns = 10;
warmup = true;
- runCaja();
+ //runCaja();
runNeko();
- runLS();
+ runNekoSimple();
+ //Sleep to let JIT kick in
Thread.sleep(10000L);
- this.numRuns = 300; //numRuns;
+ this.numRuns = 50; //numRuns;
warmup = false;
- runCaja();
+ //runCaja();
runNeko();
- runLS();
+ runNekoSimple();
}
private void runCaja() throws Exception {
@@ -76,19 +83,19 @@
// Some warmup runs with wait. Enough iterations to trigger the JIT
// Wait to allow it to swap execution paths etc...
timeParseDom(cajaParser);
+ timeParseDomSerialize(cajaParser);
}
private void runNeko() throws Exception {
output("Neko-----------------");
timeParseDom(nekoParser);
- //timeParseDom4J();
- //timeParseDom4JSerialize();
timeParseDomSerialize(nekoParser);
}
- private void runLS() throws Exception {
- output("LOAD/STORE-----------------");
- runLSSerializationTiming(nekoParser);
+ private void runNekoSimple() throws Exception {
+ output("NekoSimple-----------------");
+ timeParseDom(nekoSimpleParser);
+ timeParseDomSerialize(nekoSimpleParser);
}
private void output(String string) {
@@ -108,119 +115,42 @@
((double)parseMillis)/numRuns + "ms/run]");
}
- /*
- private void timeParseDom4J() throws GadgetException {
+ private void timeParseDomSerialize(GadgetHtmlParser parser) throws GadgetException {
+ org.w3c.dom.Document document = parser.parseDom(content);
try {
long parseStart = System.currentTimeMillis();
for (int i = 0; i < numRuns; ++i) {
- saxReader.read(new InputSource(new StringReader(content)));
+ HtmlSerializer.serialize(document);
}
long parseMillis = System.currentTimeMillis() - parseStart;
- output("Parsing DOM4J [" + parseMillis + " ms total: " +
- ((double)parseMillis)/numRuns + "ms/run]");
+ output("Serializing [" + parseMillis + " ms total: " +
+ ((double) parseMillis) / numRuns + "ms/run]");
} catch (Exception e) {
throw new GadgetException(GadgetException.Code.HTML_PARSE_ERROR, e);
}
- }
- */
- /*
- private void timeParseDom4JSerialize() throws GadgetException {
try {
- Document document = saxReader.read(new InputSource(new StringReader(content)));
- OutputFormat format = OutputFormat.createCompactFormat();
- format.setXHTML(false);
+ // Create an "identity" transformer - copies input to output
+ Transformer t = TransformerFactory.newInstance().newTransformer();
+ t.setOutputProperty(OutputKeys.METHOD, "html");
long parseStart = System.currentTimeMillis();
for (int i = 0; i < numRuns; ++i) {
StringWriter sw = new StringWriter((content.length() * 11) / 10);
- HTMLWriter htmlWriter = new HTMLWriter(sw, format) {
- protected void writeEntity(Entity entity) throws IOException {
- writer.write("&");
- writer.write(entity.getName());
- writer.write(";");
- lastOutputNodeType = org.dom4j.Node.ENTITY_REFERENCE_NODE;
- }
- };
- //htmlWriter.setResolveEntityRefs(false);
- htmlWriter.setEscapeText(false);
- htmlWriter.write(document);
+ t.transform(new DOMSource(document), new StreamResult(sw));
+ sw.toString();
}
long parseMillis = System.currentTimeMillis() - parseStart;
- output("Serializing DOM4J [" + parseMillis + " ms total: " +
- ((double)parseMillis)/numRuns + "ms/run]");
- } catch (Exception e) {
- throw new GadgetException(GadgetException.Code.HTML_PARSE_ERROR, e);
- }
-
- }
- */
-
- private void timeParseDomSerialize(GadgetHtmlParser parser) throws GadgetException {
- org.w3c.dom.Document document = parser.parseDom(content);
-
- try {
- long parseStart = System.currentTimeMillis();
- for (int i = 0; i < numRuns; ++i) {
- StringWriter sw = new StringWriter((content.length() * 11) / 10);
- HTMLSerializer xercesSerializer = new HTMLSerializer(sw, new org.apache.xml.serialize.OutputFormat());
- xercesSerializer.serialize(document);
- }
- long parseMillis = System.currentTimeMillis() - parseStart;
-
- output("Serializing Xerces [" + parseMillis + " ms total: " +
+ output("Serializing DOM Transformer [" + parseMillis + " ms total: " +
((double) parseMillis) / numRuns + "ms/run]");
+
} catch (Exception e) {
throw new GadgetException(GadgetException.Code.HTML_PARSE_ERROR, e);
}
}
- /*
- private void timeParseOld(GadgetHtmlParser parser) throws GadgetException {
- long parseStart = System.currentTimeMillis();
- for (int i = 0; i < numRuns; ++i) {
- parser.parse(content);
- }
- long parseMillis = System.currentTimeMillis() - parseStart;
-
- output("Parsing [" + parseMillis + " ms total: " +
- ((double)parseMillis)/numRuns + "ms/run]");
- }
- */
-
- private void runLSSerializationTiming(GadgetHtmlParser parser) throws Exception {
- Node n = parser.parseDom(content);
- DOMImplementationLS impl = (DOMImplementationLS) registry.getDOMImplementation("LS");
- ByteArrayOutputStream baos;
- baos = new ByteArrayOutputStream(content.length() * 2);
- LSSerializer writer = impl.createLSSerializer();
- LSParser lsParser = impl.createLSParser(LSParser.ACTION_APPEND_AS_CHILDREN, null);
-
- long serTime = 0, deserTime = 0;
- for (int i = 0; i < numRuns; ++i) {
- long serStart = System.currentTimeMillis();
- LSOutput output = impl.createLSOutput();
- baos.reset();
- output.setByteStream(baos);
- writer.write(n, output);
- serTime += (System.currentTimeMillis() - serStart);
- LSInput input = impl.createLSInput();
- input.setByteStream(new ByteArrayInputStream(baos.toByteArray()));
- long deserStart = System.currentTimeMillis();
- //XmlUtil.parse(new String(baos.toByteArray()));
- lsParser.parse(input);
- deserTime += (System.currentTimeMillis() - deserStart);
- //checkListEquality(nodes, outs);
- }
-
- output("LS Serialization [" + serTime + " ms total: "
- + ((double)serTime)/numRuns + "ms/run]");
- output("LS Deserialization [" + deserTime + " ms total: "
- + ((double)deserTime)/numRuns + "ms/run]");
- }
-
public static void main(String[] args) {
// Test can be run as standalone program to test out serialization and parsing
// performance numbers, using Caja as a parser.
Added: incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSimplifiedHtmlParserTest.java
URL: http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSimplifiedHtmlParserTest.java?rev=710176&view=auto
==============================================================================
--- incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSimplifiedHtmlParserTest.java (added)
+++ incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/nekohtml/NekoSimplifiedHtmlParserTest.java Mon Nov 3 13:41:23 2008
@@ -0,0 +1,77 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ */
+package org.apache.shindig.gadgets.parse.nekohtml;
+
+import junit.framework.TestCase;
+import org.apache.shindig.gadgets.parse.ParseModule;
+import org.apache.xml.serialize.HTMLSerializer;
+import org.apache.xml.serialize.OutputFormat;
+import org.w3c.dom.Document;
+
+import java.io.IOException;
+import java.io.StringWriter;
+
+/**
+ * Test behavior of simplified HTML parser
+ */
+public class NekoSimplifiedHtmlParserTest extends TestCase {
+
+ public void testUnbalanced() throws Exception {
+ parseAndCompareBalanced("<html><body><center>content</body></html>",
+ "<html><body><center>content</body></html>");
+ }
+
+ public void testUnbalanced2() throws Exception {
+ parseAndCompareBalanced("<html><body><img>content<img>content</body></html>",
+ "<HTML><body><IMG>content<IMG>content</body></HTML>");
+ }
+
+ public void testUnbalanced3() throws Exception {
+ parseAndCompareBalanced("<html><body><select><option>content<option></body></html>",
+ "<html><body><select><option>content<option></body></html>");
+ }
+
+ public void testUnbalanced4() throws Exception {
+ parseAndCompareBalanced("<html><body>Something awful</html>",
+ "<HTML><body>Something awful</body></HTML>");
+ }
+
+ public void testUnbalanced5() throws Exception {
+ parseAndCompareBalanced("<html><body><br />content<br></html>",
+ "<HTML><body><br />content<br></body></HTML>");
+ }
+
+ private void parseAndCompareBalanced(String content, String expected) throws Exception {
+ NekoSimplifiedHtmlParser builder = new NekoSimplifiedHtmlParser(
+ new ParseModule.DOMImplementationProvider().get());
+ Document document = builder.parseDom(content);
+ StringWriter sw = new StringWriter();
+ OutputFormat outputFormat = new OutputFormat();
+ outputFormat.setPreserveSpace(true);
+ outputFormat.setOmitDocumentType(true);
+ HTMLSerializer serializer = new HTMLSerializer(sw, outputFormat) {
+ protected void characters(String s) throws IOException {
+ this.content();
+ this._printer.printText(s);
+ }
+ };
+ serializer.serialize(document);
+
+ assertEquals(sw.toString().toLowerCase(), expected.toLowerCase());
+ }
+}
Modified: incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/rewrite/JsTagConcatContentRewriterTest.java
URL: http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/rewrite/JsTagConcatContentRewriterTest.java?rev=710176&r1=710175&r2=710176&view=diff
==============================================================================
--- incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/rewrite/JsTagConcatContentRewriterTest.java (original)
+++ incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/rewrite/JsTagConcatContentRewriterTest.java Mon Nov 3 13:41:23 2008
@@ -49,9 +49,9 @@
}
public void testJSMergePreserveNoExternal() throws Exception {
- String s = "<SCRIPT>\n"
+ String s = "<script>\n"
+ "doSomething\n"
- + "</SCRIPT>";
+ + "</script>";
Document document = htmlParser.parseDom(s);
String rewritten = rewriteHelper(rewriter, s, document);
@@ -67,81 +67,81 @@
}
public void testJSMergePreserveWithComment() throws Exception {
- String s = "<SCRIPT>" +
+ String s = "<script>" +
"<!--\ndoSomething\n-->" +
- "</SCRIPT>";
+ "</script>";
Document document = htmlParser.parseDom(s);
String rewritten = rewriteHelper(rewriter, s, document);
assertEquals(rewritten, s);
}
public void testJSMergeSingleScriptReWrite() throws Exception {
- String s = "<SCRIPT src=\"http://a.b.com/1.js\"></SCRIPT>";
- String expected = "<SCRIPT src=\"" + concatBase + "1=http%3A%2F%2Fa.b.com%2F1.js\"></SCRIPT>";
+ String s = "<script src=\"http://a.b.com/1.js\"></script>";
+ String expected = "<script src=\"" + concatBase + "1=http%3A%2F%2Fa.b.com%2F1.js\"></script>";
Document document = htmlParser.parseDom(s);
String rewritten = rewriteHelper(rewriter, s, document);
assertEquals(rewritten, expected);
}
public void testJSMergeTwoScriptReWriteWithWhitespace() throws Exception {
- String s = "<SCRIPT src=\"http://a.b.com/1.js\"></SCRIPT>"
- + "<SCRIPT src=\"http://a.b.com/2.js\"></SCRIPT>";
+ String s = "<script src=\"http://a.b.com/1.js\"></script>"
+ + "<script src=\"http://a.b.com/2.js\"></script>";
String expected
- = "<SCRIPT src=\"" + concatBase + "1=http%3A%2F%2Fa.b.com%2F1.js&2=http%3A%2F%2Fa.b.com%2F2.js\"></SCRIPT>";
+ = "<script src=\"" + concatBase + "1=http%3A%2F%2Fa.b.com%2F1.js&2=http%3A%2F%2Fa.b.com%2F2.js\"></script>";
Document document = htmlParser.parseDom(s);
String rewritten = rewriteHelper(rewriter, s, document);
assertEquals(rewritten, expected);
}
public void testJSMergeLeadAndTrailingScriptReWrite() throws Exception {
- String s = "<SCRIPT>\n"
+ String s = "<script>\n"
+ "doSomething\n"
- + "</SCRIPT>"
- + "<SCRIPT src=\"http://a.b.com/1.js\"></SCRIPT>"
- + "<SCRIPT src=\"http://a.b.com/2.js\"></SCRIPT>"
- + "<SCRIPT>\n"
+ + "</script>"
+ + "<script src=\"http://a.b.com/1.js\"></script>"
+ + "<script src=\"http://a.b.com/2.js\"></script>"
+ + "<script>\n"
+ "doSomething\n"
- + "</SCRIPT>";
- String expected = "<SCRIPT>\n"
+ + "</script>";
+ String expected = "<script>\n"
+ "doSomething\n"
- + "</SCRIPT>"
- + "<SCRIPT src=\"" + concatBase + "1=http%3A%2F%2Fa.b.com%2F1.js&2=http%3A%2F%2Fa.b.com%2F2.js\"></SCRIPT>"
- + "<SCRIPT>\n"
+ + "</script>"
+ + "<script src=\"" + concatBase + "1=http%3A%2F%2Fa.b.com%2F1.js&2=http%3A%2F%2Fa.b.com%2F2.js\"></script>"
+ + "<script>\n"
+ "doSomething\n"
- + "</SCRIPT>";
+ + "</script>";
Document document = htmlParser.parseDom(s);
String rewritten = rewriteHelper(rewriter, s, document);
assertEquals(rewritten, expected);
}
public void testJSMergeInterspersed() throws Exception {
- String s = "<SCRIPT src=\"http://a.b.com/1.js\"></SCRIPT>"
- + "<SCRIPT src=\"http://a.b.com/2.js\"></SCRIPT>"
- + "<SCRIPT><!-- doSomething --></SCRIPT>"
- + "<SCRIPT src=\"http://a.b.com/3.js\"></SCRIPT>"
- + "<SCRIPT src=\"http://a.b.com/4.js\"></SCRIPT>";
+ String s = "<script src=\"http://a.b.com/1.js\"></script>"
+ + "<script src=\"http://a.b.com/2.js\"></script>"
+ + "<script><!-- doSomething --></script>"
+ + "<script src=\"http://a.b.com/3.js\"></script>"
+ + "<script src=\"http://a.b.com/4.js\"></script>";
String expected =
- "<SCRIPT src=\"" + concatBase + "1=http%3A%2F%2Fa.b.com%2F1.js&2=http%3A%2F%2Fa.b.com%2F2.js\"></SCRIPT>" +
- "<SCRIPT><!-- doSomething --></SCRIPT>" +
- "<SCRIPT src=\"" + concatBase + "1=http%3A%2F%2Fa.b.com%2F3.js&2=http%3A%2F%2Fa.b.com%2F4.js\"></SCRIPT>";
+ "<script src=\"" + concatBase + "1=http%3A%2F%2Fa.b.com%2F1.js&2=http%3A%2F%2Fa.b.com%2F2.js\"></script>" +
+ "<script><!-- doSomething --></script>" +
+ "<script src=\"" + concatBase + "1=http%3A%2F%2Fa.b.com%2F3.js&2=http%3A%2F%2Fa.b.com%2F4.js\"></script>";
Document document = htmlParser.parseDom(s);
String rewritten = rewriteHelper(rewriter, s, document);
assertEquals(expected, rewritten);
}
public void testJSMergeDerelativizeHostRelative() throws Exception {
- String s = "<SCRIPT src=\"/1.js\"></SCRIPT>";
+ String s = "<script src=\"/1.js\"></script>";
String expected
- = "<SCRIPT src=\"" + concatBase + "1=http%3A%2F%2Fgadget.org%2F1.js\"></SCRIPT>";
+ = "<script src=\"" + concatBase + "1=http%3A%2F%2Fgadget.org%2F1.js\"></script>";
Document document = htmlParser.parseDom(s);
String rewritten = rewriteHelper(rewriter, s, document);
assertEquals(rewritten, expected);
}
public void testJSMergeDerelativizePathRelative() throws Exception {
- String s = "<SCRIPT src=\"1.js\"></SCRIPT>";
+ String s = "<script src=\"1.js\"></script>";
String expected
- = "<SCRIPT src=\"" + concatBase + "1=http%3A%2F%2Fgadget.org%2Fdir%2F1.js\"></SCRIPT>";
+ = "<script src=\"" + concatBase + "1=http%3A%2F%2Fgadget.org%2Fdir%2F1.js\"></script>";
Document document = htmlParser.parseDom(s);
String rewritten = rewriteHelper(rewriter, s, document);
assertEquals(rewritten, expected);
Added: incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/rewrite/LexerVsDomRewriteBenchmark.java
URL: http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/rewrite/LexerVsDomRewriteBenchmark.java?rev=710176&view=auto
==============================================================================
--- incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/rewrite/LexerVsDomRewriteBenchmark.java (added)
+++ incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/rewrite/LexerVsDomRewriteBenchmark.java Mon Nov 3 13:41:23 2008
@@ -0,0 +1,163 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.shindig.gadgets.rewrite;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.shindig.gadgets.parse.GadgetHtmlParser;
+import org.apache.shindig.gadgets.parse.HtmlSerializer;
+import org.apache.shindig.gadgets.parse.ParseModule;
+import org.apache.shindig.gadgets.parse.caja.CajaHtmlParser;
+import org.apache.shindig.gadgets.parse.nekohtml.NekoHtmlParser;
+import org.apache.shindig.gadgets.parse.nekohtml.NekoSimplifiedHtmlParser;
+import org.apache.shindig.gadgets.rewrite.lexer.HtmlRewriter;
+import org.apache.shindig.gadgets.rewrite.lexer.HtmlTagTransformer;
+import org.apache.shindig.gadgets.rewrite.lexer.LinkingTagRewriter;
+import org.w3c.dom.Document;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.net.URI;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * Compare performance of lexer rewriter and dom rewriter.
+ */
+public class LexerVsDomRewriteBenchmark {
+
+ private int numRuns;
+ private String content;
+
+ private GadgetHtmlParser cajaParser = new CajaHtmlParser(
+ new ParseModule.DOMImplementationProvider().get());
+
+ private GadgetHtmlParser nekoParser = new NekoHtmlParser(
+ new ParseModule.DOMImplementationProvider().get());
+
+ private GadgetHtmlParser nekoSimpleParser = new NekoSimplifiedHtmlParser(
+ new ParseModule.DOMImplementationProvider().get());
+
+ // Caja lexer
+ private Map<String, HtmlTagTransformer> defaultTransformerMap;
+ private URI dummyUri;
+
+ private LinkingTagContentRewriter domRewriter;
+ private boolean warmup;
+
+ private LexerVsDomRewriteBenchmark(String file, int numRuns) throws Exception {
+ File inputFile = new File(file);
+ if (!inputFile.exists() || !inputFile.canRead()) {
+ System.err.println("Input file: " + file + " not found or can't be read.");
+ System.exit(1);
+ }
+
+ LinkRewriter linkRewriter = new LinkRewriter() {
+ public String rewrite(String link, URI context) {
+ return link;
+ }
+ };
+
+ // Lexer setup
+ dummyUri = new URI("http://www.w3c.org");
+ URI relativeBase = new URI("http://a.b.com/");
+ LinkingTagRewriter lexerRewriter = new LinkingTagRewriter(
+ linkRewriter, new URI("http://a.b.com/"));
+ defaultTransformerMap = new HashMap<String, HtmlTagTransformer>();
+ for (String tag : lexerRewriter.getSupportedTags()) {
+ defaultTransformerMap .put(tag, lexerRewriter);
+ }
+ // End lexer setup
+
+ // DOM setup
+ domRewriter = new LinkingTagContentRewriter(linkRewriter, null);
+ // End DOM setup
+
+ content = new String(IOUtils.toByteArray(new FileInputStream(file)));
+ this.numRuns = numRuns;
+ warmup = true;
+ runLexer();
+ //run(cajaParser);
+ run(nekoParser);
+ run(nekoSimpleParser);
+ Thread.sleep(5000L);
+ warmup = false;
+ System.out.println("Lexer------");
+ runLexer();
+ //System.out.println("Caja-------");
+ //run(cajaParser);
+ System.out.println("Neko-------");
+ run(nekoParser);
+ System.out.println("NekoSimple-------");
+ run(nekoSimpleParser);
+ }
+
+ private void output(String content) {
+ if (!warmup) {
+ System.out.println(content);
+ }
+ }
+
+ private void runLexer() throws Exception {
+ long startTime = System.currentTimeMillis();
+ for (int i = 0; i < numRuns; i++) {
+ HtmlRewriter.rewrite(content, dummyUri, defaultTransformerMap);
+ }
+ long time = System.currentTimeMillis() - startTime;
+ output("Lexer Rewrite [" + time + " ms total: " +
+ ((double)time)/numRuns + "ms/run]");
+ }
+
+ private void run(GadgetHtmlParser parser) throws Exception {
+ long startTime = System.currentTimeMillis();
+ for (int i = 0; i < numRuns; i++) {
+ Document document = parser.parseDom(content);
+ domRewriter.rewrite(document, dummyUri);
+ HtmlSerializer.serialize(document);
+ }
+ long time = System.currentTimeMillis() - startTime;
+ output("DOM Rewrite [" + time + " ms total: " +
+ ((double)time)/numRuns + "ms/run]");
+
+ }
+
+
+ public static void main(String[] args) {
+ // Test can be run as standalone program to test out serialization and parsing
+ // performance numbers, using Caja as a parser.
+ if (args.length != 2) {
+ System.err.println("Args: <input-file> <num-runs>");
+ System.exit(1);
+ }
+
+ String fileArg = args[0];
+ String runsArg = args[1];
+ int numRuns = -1;
+ try {
+ numRuns = Integer.parseInt(runsArg);
+ } catch (Exception e) {
+ System.err.println("Invalid num-runs argument: " + runsArg + ", reason: " + e);
+ }
+ try {
+ new LexerVsDomRewriteBenchmark(fileArg, numRuns);
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ }
+
+}
Modified: incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/rewrite/LinkingTagContentRewriterTest.java
URL: http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/rewrite/LinkingTagContentRewriterTest.java?rev=710176&r1=710175&r2=710176&view=diff
==============================================================================
--- incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/rewrite/LinkingTagContentRewriterTest.java (original)
+++ incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/rewrite/LinkingTagContentRewriterTest.java Mon Nov 3 13:41:23 2008
@@ -51,10 +51,10 @@
+ "<IMG src=\"http://a.b.com/img2.gif\"/>"
+ "<eMbeD src=\"http://a.b.com/some.mov\"/>"
+ "<link href=\"http://a.b.com/link.html\"></link>";
- String expected = "<IMG src=\"" + LINK_PREFIX + "http://a.b.com/img.gif\">"
- + "<IMG src=\"" + LINK_PREFIX + "http://a.b.com/img2.gif\">"
- + "<EMBED src=\"" + LINK_PREFIX + "http://a.b.com/some.mov\"></EMBED>"
- + "<LINK href=\"" + LINK_PREFIX + "http://a.b.com/link.html\">";
+ String expected = "<img src=\"" + LINK_PREFIX + "http://a.b.com/img.gif\">"
+ + "<img src=\"" + LINK_PREFIX + "http://a.b.com/img2.gif\">"
+ + "<embed src=\"" + LINK_PREFIX + "http://a.b.com/some.mov\"></embed>"
+ + "<link href=\"" + LINK_PREFIX + "http://a.b.com/link.html\">";
Document document = htmlParser.parseDom(s);
String rewritten = rewriteHelper(rewriter, s, document);
assertEquals(rewritten, expected);
Modified: incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/rewrite/StyleLinksContentRewriterTest.java
URL: http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/rewrite/StyleLinksContentRewriterTest.java?rev=710176&r1=710175&r2=710176&view=diff
==============================================================================
--- incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/rewrite/StyleLinksContentRewriterTest.java (original)
+++ incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/rewrite/StyleLinksContentRewriterTest.java Mon Nov 3 13:41:23 2008
@@ -18,6 +18,7 @@
*/
package org.apache.shindig.gadgets.rewrite;
+import com.google.common.collect.Sets;
import com.google.inject.Guice;
import com.google.inject.Injector;
import org.apache.shindig.gadgets.parse.GadgetHtmlParser;
@@ -64,7 +65,9 @@
".someid {background-image:url(\"" + LINK_PREFIX + "http://a.b.com/bigimg.png\");float:right;width:165px;height:23px;margin-top:4px;margin-left:5px}";
// Rewrite, document is mutated in-place
rewriteHelper(rewriter, s, document);
- assertEquals(rewritten, document.getElementsByTagName("STYLE").item(0).getTextContent());
+ assertEquals(rewritten,
+ HtmlContentRewriter.getElementsByTagNameCaseInsensitive(document,
+ Sets.newHashSet("style")).get(0).getTextContent());
}
public void testStyleTagRewritesIgnoredOnBadParse() throws Exception {