You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@shindig.apache.org by lr...@apache.org on 2008/10/25 03:17:10 UTC

svn commit: r707804 [1/2] - in /incubator/shindig/trunk/java/gadgets/src: main/java/org/apache/shindig/gadgets/parse/ main/java/org/apache/shindig/gadgets/parse/caja/ main/java/org/apache/shindig/gadgets/parse/nekohtml/ main/java/org/apache/shindig/gad...

Author: lryan
Date: Fri Oct 24 18:17:10 2008
New Revision: 707804

URL: http://svn.apache.org/viewvc?rev=707804&view=rev
Log:
See thread on shindig-dev about performance of the various DOM parsing strategies.
Convert the existing rewriters to use w3c dom.
Eliminate the old ParsedHtmlXXX classes
Use Xerces HTMLSerializer class for more consistent HTML output formatting

Removed:
    incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/AbstractCachingGadgetHtmlParser.java
    incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/GadgetHtmlNode.java
    incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/ParseTreeSerializer.java
    incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/ParsedHtmlAttribute.java
    incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/ParsedHtmlNode.java
    incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/GadgetHtmlNodeTest.java
Modified:
    incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/GadgetHtmlParser.java
    incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/caja/CajaHtmlParser.java
    incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoHtmlParser.java
    incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/rewrite/HtmlContentRewriter.java
    incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/rewrite/JsTagConcatContentRewriter.java
    incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/rewrite/LinkingTagContentRewriter.java
    incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/rewrite/MutableContent.java
    incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/rewrite/StyleLinksContentRewriter.java
    incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/HtmlParserTest.java
    incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/ParseTreeSerializerBenchmark.java
    incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/rewrite/FeatureBasedRewriterTestBase.java
    incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/rewrite/JsTagConcatContentRewriterTest.java
    incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/rewrite/LinkingTagContentRewriterTest.java
    incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/rewrite/MutableContentTest.java
    incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/rewrite/StyleLinksContentRewriterTest.java

Modified: incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/GadgetHtmlParser.java
URL: http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/GadgetHtmlParser.java?rev=707804&r1=707803&r2=707804&view=diff
==============================================================================
--- incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/GadgetHtmlParser.java (original)
+++ incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/GadgetHtmlParser.java Fri Oct 24 18:17:10 2008
@@ -17,12 +17,9 @@
  */
 package org.apache.shindig.gadgets.parse;
 
+import com.google.inject.ImplementedBy;
 import org.apache.shindig.gadgets.GadgetException;
 import org.apache.shindig.gadgets.parse.caja.CajaHtmlParser;
-
-import com.google.inject.ImplementedBy;
-
-import org.w3c.dom.Node;
 import org.w3c.dom.Document;
 
 /**
@@ -45,8 +42,6 @@
     return normalized.contains("<!DOCTYPE") || normalized.contains("<HTML");
   }
 
-  public abstract java.util.List<ParsedHtmlNode> parse(String source) throws GadgetException;
-
   /**
    * @param source
    * @return a parsed document or document fragment

Modified: incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/caja/CajaHtmlParser.java
URL: http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/caja/CajaHtmlParser.java?rev=707804&r1=707803&r2=707804&view=diff
==============================================================================
--- incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/caja/CajaHtmlParser.java (original)
+++ incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/caja/CajaHtmlParser.java Fri Oct 24 18:17:10 2008
@@ -17,36 +17,25 @@
  */
 package org.apache.shindig.gadgets.parse.caja;
 
-import org.apache.shindig.gadgets.GadgetException;
-import org.apache.shindig.gadgets.parse.GadgetHtmlParser;
-import org.apache.shindig.gadgets.parse.ParsedHtmlAttribute;
-import org.apache.shindig.gadgets.parse.ParsedHtmlNode;
-
-import com.google.caja.lexer.CharProducer;
-import com.google.caja.lexer.HtmlLexer;
-import com.google.caja.lexer.HtmlTokenType;
-import com.google.caja.lexer.InputSource;
-import com.google.caja.lexer.ParseException;
-import com.google.caja.lexer.TokenQueue;
+import com.google.caja.lexer.*;
 import com.google.caja.parser.html.DomParser;
 import com.google.caja.parser.html.DomTree;
 import com.google.caja.reporting.MessageQueue;
 import com.google.caja.reporting.SimpleMessageQueue;
+import com.google.inject.Inject;
 import com.google.inject.Provider;
 import com.google.inject.Singleton;
-import com.google.inject.Inject;
-
+import org.apache.shindig.gadgets.GadgetException;
+import org.apache.shindig.gadgets.parse.GadgetHtmlParser;
 import org.w3c.dom.Attr;
+import org.w3c.dom.Document;
 import org.w3c.dom.Element;
 import org.w3c.dom.Node;
-import org.w3c.dom.Document;
 import org.w3c.dom.html.HTMLDocument;
 
 import java.io.StringReader;
 import java.net.URI;
 import java.net.URISyntaxException;
-import java.util.ArrayList;
-import java.util.List;
 
 /**
  * Caja-based implementation of a {@code GadgetHtmlParser}.
@@ -62,17 +51,6 @@
   }
 
   @Override
-  public List<ParsedHtmlNode> parse(String source) throws GadgetException {
-    DomTree domTree = getFragment(source);
-    List<ParsedHtmlNode> nodes =
-        new ArrayList<ParsedHtmlNode>(domTree.children().size());
-    for (DomTree child : domTree.children()) {
-      nodes.add(new CajaParsedHtmlNode(child));
-    }
-    return nodes;
-  }
-
-  @Override
   public Document parseDom(String source) throws GadgetException {
     // Wrap the whole thing in a top-level node to get full contents.
     return makeDocument(getFragment(source));
@@ -149,85 +127,4 @@
       // TODO Implement for comment, fragment etc...
     }
   }
-
-  /**
-   * {@code ParsedHtmlNode} implementation built using Caja parsing primitives.
-   */
-  private static class CajaParsedHtmlNode implements ParsedHtmlNode {
-    private final List<ParsedHtmlAttribute> attributes;
-    private final List<ParsedHtmlNode> children;
-    private final String name;
-    private final String text;
-    
-    private CajaParsedHtmlNode(DomTree elem) {
-      if (elem instanceof DomTree.Tag) {
-        DomTree.Tag tag = (DomTree.Tag)elem;
-        attributes = new ArrayList<ParsedHtmlAttribute>(1);
-        children = new ArrayList<ParsedHtmlNode>();
-        name = tag.getTagName();
-        text = null;
-        for (DomTree child : elem.children()) {
-          if (child instanceof DomTree.Attrib) {
-            attributes.add(new CajaParsedHtmlAttribute((DomTree.Attrib)child));
-          } else {
-            children.add(new CajaParsedHtmlNode(child));
-          }
-        }
-      } else if (elem instanceof DomTree.Text ||
-                 elem instanceof DomTree.CData) {
-        // DomTree.CData can theoretically occur since it's supported
-        // in HTML5, but the implementation doesn't supply this yet.
-        attributes = null;
-        children = null;
-        name = null;
-        text = ((DomTree.Text)elem).getValue();
-      } else {
-        // This should never happen. The only remaining types are
-        // DomTree.Fragment, which is simply a top-level container
-        // that results from the DomTree.parseFragment() method,
-        // and DomTree.Value, which is always a child of DomTree.Attrib.
-        attributes = null;
-        children = null;
-        name = null;
-        text = null;
-      }
-    }
-    
-    public List<ParsedHtmlAttribute> getAttributes() {
-      return attributes;
-    }
-
-    public List<ParsedHtmlNode> getChildren() {
-      return children;
-    }
-
-    public String getTagName() {
-      return name;
-    }
-
-    public String getText() {
-      return text;
-    }
-  }
-  
-  /**
-   * {@code ParsedHtmlAttribute} built from a Caja DomTree primitive.
-   */
-  private static class CajaParsedHtmlAttribute implements ParsedHtmlAttribute {
-    private final String name;
-    private final String value;
-    
-    private CajaParsedHtmlAttribute(DomTree.Attrib attrib) {
-      name = attrib.getAttribName();
-      value = attrib.getAttribValue();
-    }
-    
-    public String getName() {
-      return name;
-    }
-
-    public String getValue() {
-      return value;
-    }
-  }
 }

Modified: incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoHtmlParser.java
URL: http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoHtmlParser.java?rev=707804&r1=707803&r2=707804&view=diff
==============================================================================
--- incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoHtmlParser.java (original)
+++ incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/nekohtml/NekoHtmlParser.java Fri Oct 24 18:17:10 2008
@@ -17,30 +17,21 @@
  */
 package org.apache.shindig.gadgets.parse.nekohtml;
 
+import com.google.inject.Inject;
+import com.google.inject.Provider;
 import org.apache.shindig.common.xml.XmlUtil;
 import org.apache.shindig.gadgets.GadgetException;
 import org.apache.shindig.gadgets.parse.GadgetHtmlParser;
-import org.apache.shindig.gadgets.parse.ParsedHtmlAttribute;
-import org.apache.shindig.gadgets.parse.ParsedHtmlNode;
-
-import com.google.common.collect.Lists;
-import com.google.inject.Provider;
-import com.google.inject.Inject;
-
 import org.cyberneko.html.parsers.DOMFragmentParser;
 import org.w3c.dom.Document;
 import org.w3c.dom.DocumentFragment;
-import org.w3c.dom.NamedNodeMap;
 import org.w3c.dom.Node;
-import org.w3c.dom.NodeList;
 import org.w3c.dom.html.HTMLDocument;
 import org.xml.sax.InputSource;
 import org.xml.sax.SAXException;
 
 import java.io.IOException;
 import java.io.StringReader;
-import java.util.Collections;
-import java.util.List;
 
 /**
  * Parser that uses the NekoHtml parser.
@@ -59,15 +50,7 @@
     this.documentProvider = documentProvider;
   }
 
-  public List<ParsedHtmlNode> parse(String source) throws GadgetException {
-    try {
-      Document doc = parseFragment(source);
-      return unwrapNodeList(doc.getFirstChild().getChildNodes());
-    } catch (Exception e) {
-      throw new GadgetException(GadgetException.Code.HTML_PARSE_ERROR, e);
-    }
-  }
-
+  @Override
   public Document parseDom(String source) throws GadgetException {
     try {
       return parseFragment(source);
@@ -92,80 +75,4 @@
     }
     return htmlDoc;
   }
-
-  private static List<ParsedHtmlNode> unwrapNodeList(NodeList nodeList) {
-    if (nodeList == null) return Collections.emptyList();
-    List<ParsedHtmlNode> list = Lists.newArrayListWithExpectedSize(nodeList.getLength());
-    for (int i = 0; i < nodeList.getLength(); i++) {
-      list.add(new NodeWrapper(nodeList.item(i)));
-    }
-    return list;
-  }
-
-  private static List<ParsedHtmlAttribute> unwrapAttributeList(NamedNodeMap attrList) {
-    if (attrList == null) return Collections.emptyList();
-    List<ParsedHtmlAttribute> list = Lists.newArrayListWithExpectedSize(attrList.getLength());
-    for (int i = 0; i < attrList.getLength(); i++) {
-      list.add(new AttributeWrapper(attrList.item(i)));     
-    }
-    return list;
-  }
-
-  static class NodeWrapper implements ParsedHtmlNode {
-    private Node wrapped;
-    private List<ParsedHtmlAttribute> attributes;
-    private List<ParsedHtmlNode> children;
-
-    NodeWrapper(Node wrapped) {
-      this.wrapped = wrapped;
-      getChildren();
-      getAttributes();
-    }
-
-    public String getTagName() {
-      if (wrapped.getNodeType() == Node.TEXT_NODE) return null;
-      return wrapped.getNodeName();
-    }
-
-    public List<ParsedHtmlAttribute> getAttributes() {
-      if (wrapped.getNodeType() == Node.TEXT_NODE) return null;
-      if (attributes == null) {
-        attributes = unwrapAttributeList(wrapped.getAttributes());
-      }
-      return attributes;
-    }
-
-    public List<ParsedHtmlNode> getChildren() {
-      if (wrapped.getNodeType() == Node.TEXT_NODE) return null;
-      if (children == null) {
-        children = unwrapNodeList(wrapped.getChildNodes());
-      }
-      // YUK!
-      if (children.isEmpty()) return null;
-      return children;
-    }
-
-    public String getText() {
-      if (wrapped.getNodeType() == Node.TEXT_NODE ) {
-        return wrapped.getTextContent();
-      }
-      return null;
-    }
-  }
-
-  static class AttributeWrapper implements ParsedHtmlAttribute {
-    private Node wrapped;
-
-    AttributeWrapper(Node wrapped) {
-      this.wrapped = wrapped;
-    }
-
-    public String getName() {
-      return wrapped.getNodeName();
-    }
-
-    public String getValue() {
-      return wrapped.getNodeValue();
-    }
-  }
 }

Modified: incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/rewrite/HtmlContentRewriter.java
URL: http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/rewrite/HtmlContentRewriter.java?rev=707804&r1=707803&r2=707804&view=diff
==============================================================================
--- incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/rewrite/HtmlContentRewriter.java (original)
+++ incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/rewrite/HtmlContentRewriter.java Fri Oct 24 18:17:10 2008
@@ -22,8 +22,8 @@
 import org.apache.shindig.gadgets.Gadget;
 import org.apache.shindig.gadgets.http.HttpRequest;
 import org.apache.shindig.gadgets.http.HttpResponse;
-import org.apache.shindig.gadgets.parse.GadgetHtmlNode;
 import org.apache.shindig.gadgets.spec.View;
+import org.w3c.dom.Document;
 
 import java.net.URI;
 
@@ -35,7 +35,7 @@
  */
 public abstract class HtmlContentRewriter implements ContentRewriter {
 
-  protected abstract RewriterResults rewrite(GadgetHtmlNode root, URI baseUri);
+  protected abstract RewriterResults rewrite(Document doc, URI baseUri);
 
   public static String getMimeType(HttpRequest request, HttpResponse original) {
     String mimeType = request.getRewriteMimeType();
@@ -49,7 +49,7 @@
       MutableContent content) {
     String mimeType = getMimeType(request, original);
     if (mimeType.toLowerCase().contains("html")) {
-      return rewriteHtml(content.getParseTree(), request.getUri().toJavaUri());
+      return rewriteHtml(content.getDocument(), request.getUri().toJavaUri());
     }
     return null;
   }
@@ -60,12 +60,12 @@
     if (view != null && view.getHref() != null) {
       base = view.getHref();
     }
-    return rewriteHtml(content.getParseTree(), base.toJavaUri());
+    return rewriteHtml(content.getDocument(), base.toJavaUri());
   }
 
-  private RewriterResults rewriteHtml(GadgetHtmlNode root, URI baseUri) {
-    if (root != null) {
-      return rewrite(root, baseUri);
+  private RewriterResults rewriteHtml(Document doc, URI baseUri) {
+    if (doc != null) {
+      return rewrite(doc, baseUri);
     }
     return null;
   }

Modified: incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/rewrite/JsTagConcatContentRewriter.java
URL: http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/rewrite/JsTagConcatContentRewriter.java?rev=707804&r1=707803&r2=707804&view=diff
==============================================================================
--- incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/rewrite/JsTagConcatContentRewriter.java (original)
+++ incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/rewrite/JsTagConcatContentRewriter.java Fri Oct 24 18:17:10 2008
@@ -18,24 +18,23 @@
  */
 package org.apache.shindig.gadgets.rewrite;
 
+import com.google.common.collect.Lists;
 import org.apache.shindig.common.uri.Uri;
 import org.apache.shindig.common.util.Utf8UrlCoder;
 import org.apache.shindig.gadgets.Gadget;
 import org.apache.shindig.gadgets.http.HttpRequest;
 import org.apache.shindig.gadgets.http.HttpResponse;
-import org.apache.shindig.gadgets.parse.GadgetHtmlNode;
 import org.apache.shindig.gadgets.servlet.ProxyBase;
 import org.apache.shindig.gadgets.spec.GadgetSpec;
 import org.apache.shindig.gadgets.spec.View;
+import org.w3c.dom.Node;
+import org.w3c.dom.NodeList;
 
 import java.io.UnsupportedEncodingException;
-import java.net.URI;
-import java.net.URISyntaxException;
 import java.net.URLEncoder;
 import java.util.ArrayList;
 import java.util.LinkedList;
 import java.util.List;
-import java.util.Queue;
 
 public class JsTagConcatContentRewriter implements ContentRewriter {
   private final static int MAX_URL_LENGTH = 1500;
@@ -68,93 +67,90 @@
       return null;
     }
 
-    // Bootstrap queue of children over which to iterate,
-    // ie. lists of siblings to potentially combine
-    Queue<GadgetHtmlNode> nodesToProcess = new LinkedList<GadgetHtmlNode>();
-    nodesToProcess.add(content.getParseTree());
+    // Get all the script tags
+    NodeList scriptTags = content.getDocument().getElementsByTagName("SCRIPT");
+
+    // Copy NodeList as it respects changes to the underlying document which is a
+    // behavior we dont want when removing nodes
+    List<Node> nodeList = Lists.newArrayListWithExpectedSize(scriptTags.getLength());
+    for (int i = 0; i < scriptTags.getLength(); i++) {
+      nodeList.add(scriptTags.item(i));
+    }
 
     String concatBase = getJsConcatBase(gadget.getSpec(), rewriterFeature);
+    Uri contentBase = gadget.getSpec().getUrl();
+    View view = gadget.getCurrentView();
+    if (view != null && view.getHref() != null) {
+      contentBase = view.getHref();
+    }
 
-    while (!nodesToProcess.isEmpty()) {
-      GadgetHtmlNode parentNode = nodesToProcess.remove();
-      if (!parentNode.isText()) {
-        List<GadgetHtmlNode> childList = parentNode.getChildren();
-
-        // Iterate over children next in depth-first fashion.
-        // Text nodes (such as <script src> processed here) will be ignored.
-        nodesToProcess.addAll(childList);
-
-        List<GadgetHtmlNode> toRemove = new ArrayList<GadgetHtmlNode>();
-        List<URI> scripts = new ArrayList<URI>();
-        boolean processScripts = false;
-        for (int i = 0; i < childList.size(); ++i) {
-          GadgetHtmlNode cur = childList.get(i);
-
-          // Find consecutive <script src=...> tags
-          if (!cur.isText() &&
-               cur.getTagName().equalsIgnoreCase("script") &&
-               cur.hasAttribute("src")) {
-            URI scriptUri = null;
-            try {
-              Uri base = gadget.getSpec().getUrl();
-              View view = gadget.getCurrentView();
-              if (view != null && view.getHref() != null) {
-                base = view.getHref();
-              }
-              scriptUri = base.resolve(Uri.parse(cur.getAttributeValue("src"))).toJavaUri();
-            } catch (IllegalArgumentException e) {
-              // Same behavior as JavascriptTagMerger
-              // Perhaps switch to ignoring script src instead?
-              throw new RuntimeException(e);
-            }
-            scripts.add(scriptUri);
-            toRemove.add(cur);
-          } else if (scripts.size() > 0 && cur.isText() && cur.getText().matches("\\s*")) {
-            // Whitespace after one or more scripts. Ignore and remove.
-            toRemove.add(cur);
-          } else if (scripts.size() > 0) {
-            processScripts = true;
-          }
-
-          if (i == (childList.size() - 1)) {
-            processScripts = true;
-          }
-
-          if (processScripts && scripts.size() > 0) {
-            // Tags found. Concatenate scripts together.
-            List<URI> concatUris = getConcatenatedUris(concatBase, scripts);
-
-            // Insert concatenated nodes before first match
-            for (URI concatUri : concatUris) {
-              GadgetHtmlNode newScript = new GadgetHtmlNode("script", null);
-              newScript.setAttribute("src", concatUri.toString());
-              parentNode.insertBefore(newScript, toRemove.get(0));
-            }
-
-            // Remove contributing match nodes
-            for (GadgetHtmlNode remove : toRemove) {
-              parentNode.removeChild(remove);
-            }
-
-            processScripts = false;
-            scripts.clear();
-            toRemove.clear();
-          }
+    boolean mutated = false;
+    List<Node> concatenateable = new ArrayList<Node>();
+    for (int i = 0; i < nodeList.size(); i++) {
+      Node scriptTag = nodeList.get(i);
+      Node nextSciptTag = null;
+      if (i + 1 < nodeList.size()) {
+        nextSciptTag = nodeList.get(i+1);
+      }
+      Node src = scriptTag.getAttributes().getNamedItem("src");
+      if (src != null) {
+        mutated = true;
+        concatenateable.add(scriptTag);
+        if (nextSciptTag == null ||
+            !nextSciptTag.equals(getNextSiblingElement(scriptTag))) {
+          // Next tag is not concatenateable
+          concatenateTags(concatenateable, concatBase, contentBase);
+          concatenateable.clear();
         }
+      } else {
+        concatenateTags(concatenateable, concatBase, contentBase);
+        concatenateable.clear();
       }
     }
+    concatenateTags(concatenateable, concatBase, contentBase);
+
+    if (mutated) {
+      MutableContent.notifyEdit(content.getDocument());
+    }
 
     return RewriterResults.cacheableIndefinitely();
   }
 
-  private List<URI> getConcatenatedUris(String concatBase, List<URI> uris) {
-    List<URI> concatUris = new LinkedList<URI>();
+  private void concatenateTags(List<Node> tags, String concatBase, Uri contentBase) {
+    List<Uri> scriptSrcList = Lists.newArrayListWithExpectedSize(tags.size());
+    for (Node scriptNode : tags) {
+      try {
+        scriptSrcList.add(
+            contentBase.resolve(
+                Uri.parse(scriptNode.getAttributes().getNamedItem("src").getNodeValue())));
+      } catch (IllegalArgumentException e) {
+        // Same behavior as JavascriptTagMerger
+        // Perhaps switch to ignoring script src instead?
+        throw new RuntimeException(e);
+      }
+    }
+
+    List<Uri> concatented = getConcatenatedUris(concatBase, scriptSrcList);
+    for (int i = 0; i < tags.size(); i++) {
+      if (i < concatented.size()) {
+        // Set new URLs into existing tags
+        tags.get(i).getAttributes().getNamedItem("src").setNodeValue(
+            concatented.get(i).toString());
+      } else {
+        // Remove remainder
+        tags.get(i).getParentNode().removeChild(tags.get(i));
+      }
+    }
+  }
+
+  private List<Uri> getConcatenatedUris(String concatBase, List<Uri> uris) {
+    List<Uri> concatUris = new LinkedList<Uri>();
     int paramIndex = 1;
     StringBuilder builder = null;
     int maxUriLen = MAX_URL_LENGTH + concatBase.length();
     try {
       int uriIx = 0, lastUriIx = (uris.size() - 1);
-      for (URI uri : uris) {
+      for (Uri uri : uris) {
         if (paramIndex == 1) {
           builder = new StringBuilder(concatBase);
         } else {
@@ -165,7 +161,7 @@
         if (builder.length() > maxUriLen ||
             uriIx == lastUriIx) {
           // Went over URI length warning limit or on the last uri
-          concatUris.add(new URI(builder.toString()));
+          concatUris.add(Uri.parse(builder.toString()));
           builder = null;
           paramIndex = 0;
         }
@@ -174,8 +170,6 @@
       }
     } catch (UnsupportedEncodingException e) {
       throw new RuntimeException(e);
-    } catch (URISyntaxException e) {
-      throw new RuntimeException(e);
     }
     return concatUris;
   }
@@ -191,4 +185,12 @@
            '&';
   }
 
+  private Node getNextSiblingElement(Node n) {
+    n = n.getNextSibling();
+    while (n != null && n.getNodeType() != Node.ELEMENT_NODE) {
+      n = n.getNextSibling();
+    }
+    return n;
+  }
+
 }

Modified: incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/rewrite/LinkingTagContentRewriter.java
URL: http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/rewrite/LinkingTagContentRewriter.java?rev=707804&r1=707803&r2=707804&view=diff
==============================================================================
--- incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/rewrite/LinkingTagContentRewriter.java (original)
+++ incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/rewrite/LinkingTagContentRewriter.java Fri Oct 24 18:17:10 2008
@@ -18,24 +18,22 @@
  */
 package org.apache.shindig.gadgets.rewrite;
 
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.LinkedList;
-import java.util.Map;
-import java.util.Queue;
-import java.util.Set;
-
-import org.apache.shindig.gadgets.parse.GadgetHtmlNode;
+import org.w3c.dom.Document;
+import org.w3c.dom.NamedNodeMap;
+import org.w3c.dom.Node;
+import org.w3c.dom.traversal.DocumentTraversal;
+import org.w3c.dom.traversal.NodeFilter;
+import org.w3c.dom.traversal.NodeIterator;
 
 import java.net.URI;
+import java.util.*;
 
 public class LinkingTagContentRewriter extends HtmlContentRewriter {
   private final LinkRewriter linkRewriter;
   private final Map<String, Set<String>> tagAttributeTargets;
-  
+
   public LinkingTagContentRewriter(LinkRewriter linkRewriter,
-      Map<String, Set<String>> attributeTargets) {
+                                   Map<String, Set<String>> attributeTargets) {
     this.linkRewriter = linkRewriter;
     if (attributeTargets != null) {
       this.tagAttributeTargets = attributeTargets;
@@ -45,44 +43,55 @@
   }
 
   @Override
-  protected RewriterResults rewrite(GadgetHtmlNode root, URI baseUri) {
-	if (linkRewriter == null) {
-	  // Sanity test.
-	  return null;
-	}
-	  
-    Queue<GadgetHtmlNode> nodesToProcess = new LinkedList<GadgetHtmlNode>();
-    nodesToProcess.addAll(root.getChildren());
-
-    while (!nodesToProcess.isEmpty()) {
-      GadgetHtmlNode curNode = nodesToProcess.remove();
-      if (!curNode.isText()) {
-        // Depth-first iteration over children. Order doesn't matter anyway.
-        nodesToProcess.addAll(curNode.getChildren());
-
-        Set<String> curTagAttrs =
-            tagAttributeTargets.get(curNode.getTagName().toLowerCase());
-        if (curTagAttrs != null) {
-          for (String attrKey : curNode.getAttributeKeys()) {
-            if (curTagAttrs.contains(attrKey.toLowerCase())) {
-              String attrValue = curNode.getAttributeValue(attrKey);
-               // Attribute marked for rewriting: do it!
-              curNode.setAttribute(attrKey, linkRewriter.rewrite(attrValue, baseUri));
-            }
-          }
-        }
-      }
+  protected RewriterResults rewrite(Document root, final URI baseUri) {
+    if (linkRewriter == null) {
+      // Sanity test.
+      return null;
     }
+    boolean mutated = false;
+
+    if (root instanceof DocumentTraversal) {
+      NodeIterator nodeIterator = ((DocumentTraversal) root)
+          .createNodeIterator(root, NodeFilter.SHOW_ELEMENT,
+              new NodeFilter() {
+                public short acceptNode(Node n) {
+                  Set<String> stringSet = tagAttributeTargets.get(n.getNodeName());
+                  if (stringSet != null) {
+                    NamedNodeMap attributes = n.getAttributes();
+                    // TODO - Check is NodeMap lookup is case insensitive, if so use that
+                    for (String attribute : stringSet) {
+                      for (int j = 0; j < attributes.getLength(); j++) {
+                        Node attributeNode = attributes.item(j);
+                        if (attributeNode.getNodeName().equalsIgnoreCase(attribute)) {
+                          attributeNode.setNodeValue(linkRewriter.rewrite(
+                              attributeNode.getNodeValue(), baseUri));
+                        }
+                      }
+                    }
+                    return NodeFilter.FILTER_ACCEPT;
+                  } else {
+                    return NodeFilter.FILTER_REJECT;
+                  }
+                }
+              }, false);
       
+      while (nodeIterator.nextNode() != null) {
+        mutated= true;
+      }
+    }
+
+    if (mutated) {
+      MutableContent.notifyEdit(root);
+    }
+
     return RewriterResults.cacheableIndefinitely();
   }
 
   private static Map<String, Set<String>> getDefaultTargets() {
-    Map<String, Set<String>> targets  = new HashMap<String, Set<String>>();
-    targets.put("img", new HashSet<String>(Arrays.asList("src")));
-    targets.put("embed", new HashSet<String>(Arrays.asList("src")));
-    targets.put("link", new HashSet<String>(Arrays.asList("href")));
+    Map<String, Set<String>> targets = new HashMap<String, Set<String>>();
+    targets.put("IMG", new HashSet<String>(Arrays.asList("src")));
+    targets.put("EMBED", new HashSet<String>(Arrays.asList("src")));
+    targets.put("LINK", new HashSet<String>(Arrays.asList("href")));
     return targets;
   }
-
-}
+}
\ No newline at end of file

Modified: incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/rewrite/MutableContent.java
URL: http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/rewrite/MutableContent.java?rev=707804&r1=707803&r2=707804&view=diff
==============================================================================
--- incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/rewrite/MutableContent.java (original)
+++ incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/rewrite/MutableContent.java Fri Oct 24 18:17:10 2008
@@ -17,14 +17,14 @@
  */
 package org.apache.shindig.gadgets.rewrite;
 
-import java.io.IOException;
-import java.io.StringWriter;
-import java.util.List;
-
 import org.apache.shindig.gadgets.GadgetException;
-import org.apache.shindig.gadgets.parse.GadgetHtmlNode;
 import org.apache.shindig.gadgets.parse.GadgetHtmlParser;
-import org.apache.shindig.gadgets.parse.ParsedHtmlNode;
+import org.apache.xml.serialize.HTMLSerializer;
+import org.apache.xml.serialize.OutputFormat;
+import org.w3c.dom.Document;
+
+import java.io.IOException;
+import java.io.StringWriter;
 
 /**
  * Object that maintains a String representation of arbitrary contents
@@ -32,19 +32,26 @@
  */
 public class MutableContent {
   private String content;
-  private GadgetHtmlNode parseTree;
+  private Document document;
   private ContentEditListener editListener;
   private int parseEditId;
   private int contentParseId;
   private final GadgetHtmlParser contentParser;
 
+  private static final String MUTABLE_CONTENT_LISTENER = "MutableContentListener";
+
+  public static void notifyEdit(Document doc) {
+    ContentEditListener listener = (ContentEditListener)doc.getUserData(MUTABLE_CONTENT_LISTENER);
+    if (listener != null) {
+      listener.nodeEdited();
+    }
+  }
+
   public MutableContent(GadgetHtmlParser contentParser) {
     this.contentParser = contentParser;
     this.contentParseId = parseEditId = 0;
   }
 
-  public static final String ROOT_NODE_TAG_NAME = "gadget-root";
-  
   /**
    * Retrieves the current content for this object in String form.
    * If content has been retrieved in parse tree form and has
@@ -63,13 +70,12 @@
       // per rendering cycle: all rewriters (or other manipulators)
       // operating on the parse tree should happen together.
       contentParseId = parseEditId;
-      StringWriter sw = new StringWriter();
-      for (GadgetHtmlNode node : parseTree.getChildren()) {
-        try {
-          node.render(sw);
-        } catch (IOException e) {
-          // Never happens.
-        }
+      StringWriter sw = new StringWriter((content.length() * 10) / 9);
+
+      try {
+        new HTMLSerializer(sw, new OutputFormat(document)).serialize(document);
+      } catch (IOException e) {
+        // Never happens.
       }
       content = sw.toString();
     }
@@ -112,9 +118,9 @@
    * @return Top-level node whose children represent the gadget's contents, or
    *         null if no parser is configured, String contents are null, or contents unparseable.
    */
-  public GadgetHtmlNode getParseTree() {
-    if (parseTree != null && !editListener.stringWasEdited()) {
-      return parseTree;
+  public Document getDocument() {
+    if (document != null && !editListener.stringWasEdited()) {
+      return document;
     }
   
     if (content == null || contentParser == null) {
@@ -123,34 +129,27 @@
   
     // One ContentEditListener per parse tree.
     editListener = new ContentEditListener();
-    parseTree = new GadgetHtmlNode(ROOT_NODE_TAG_NAME, null);
-    List<ParsedHtmlNode> parsed = null;
     try {
-      parsed = contentParser.parse(content);
+      document = contentParser.parseDom(content);
+      if (document != null) {
+        document.setUserData(MUTABLE_CONTENT_LISTENER, editListener, null);
+      }
     } catch (GadgetException e) {
       // TODO: emit info message
       return null;
     }
   
-    if (parsed == null) {
-      return null;
-    }
-    
-    for (ParsedHtmlNode parsedNode : parsed) {
-      parseTree.appendChild(new GadgetHtmlNode(parsedNode, editListener));
-    }
-  
     // Parse tree created from content: edit IDs are the same
     contentParseId = parseEditId;
-    return parseTree;
+    return document;
   }
   
   // Intermediary object tracking edit behavior for the MutableHtmlContent to help maintain
   // state consistency. GadgetHtmlNode calls nodeEdited whenever a modification
   // is made to its original source.
-  private class ContentEditListener implements GadgetHtmlNode.EditListener {
+  private class ContentEditListener {
     private boolean stringEdited = false;
-  
+
     public void nodeEdited() {
       ++parseEditId;
       if (stringEdited) {
@@ -159,11 +158,11 @@
         throw new IllegalStateException("Edited parse node after setting String content");
       }
     }
-  
+
     private void stringEdited() {
       stringEdited = true;
     }
-  
+
     private boolean stringWasEdited() {
       return stringEdited;
     }

Modified: incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/rewrite/StyleLinksContentRewriter.java
URL: http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/rewrite/StyleLinksContentRewriter.java?rev=707804&r1=707803&r2=707804&view=diff
==============================================================================
--- incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/rewrite/StyleLinksContentRewriter.java (original)
+++ incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/rewrite/StyleLinksContentRewriter.java Fri Oct 24 18:17:10 2008
@@ -22,12 +22,12 @@
 import org.apache.shindig.gadgets.Gadget;
 import org.apache.shindig.gadgets.http.HttpRequest;
 import org.apache.shindig.gadgets.http.HttpResponse;
-import org.apache.shindig.gadgets.parse.GadgetHtmlNode;
 import org.apache.shindig.gadgets.spec.View;
+import org.w3c.dom.Document;
+import org.w3c.dom.Node;
+import org.w3c.dom.NodeList;
 
 import java.net.URI;
-import java.util.LinkedList;
-import java.util.Queue;
 
 public class StyleLinksContentRewriter implements ContentRewriter {
   // TODO: consider providing helper base class for node-visitor content rewriters
@@ -44,7 +44,7 @@
       MutableContent content) {
     String mimeType = HtmlContentRewriter.getMimeType(request, original);
     if (mimeType.contains("html")) {
-      rewriteHtml(content.getParseTree(), request.getUri().toJavaUri());
+      rewriteHtml(content.getDocument(), request.getUri().toJavaUri());
     } else if (mimeType.contains("css")) {
       content.setContent(rewriteCss(content.getContent(), request.getUri().toJavaUri()));
     }
@@ -65,49 +65,44 @@
       base = view.getHref();
     }
 
-    return rewriteHtml(content.getParseTree(), base.toJavaUri());
+    return rewriteHtml(content.getDocument(), base.toJavaUri());
   }
 
-  private RewriterResults rewriteHtml(GadgetHtmlNode root, URI baseUri) {
-    if (root == null) {
+  private RewriterResults rewriteHtml(Document doc, URI baseUri) {
+    if (doc == null) {
       return null;
     }
+    boolean mutated = false;
 
-    Queue<GadgetHtmlNode> nodesToProcess =
-      new LinkedList<GadgetHtmlNode>();
-
-    nodesToProcess.addAll(root.getChildren());
-
-    while (!nodesToProcess.isEmpty()) {
-      GadgetHtmlNode curNode = nodesToProcess.remove();
-      if (!curNode.isText()) {
-        // Depth-first iteration over children. Order doesn't matter anyway.
-        nodesToProcess.addAll(curNode.getChildren());
-
-        if (curNode.getTagName().equalsIgnoreCase("style")) {
-          String styleText = getNodeChildText(curNode);
-          curNode.clearChildren();
-          curNode.appendChild(new GadgetHtmlNode(rewriteCss(styleText, baseUri)));
-        }
+    Node head;
+    NodeList headTags = doc.getElementsByTagName("HEAD");
+    if (headTags.getLength() == 0) {
+      mutated = true;
+      head = doc.getDocumentElement().appendChild(doc.createElement("HEAD"));
+    } else {
+      head = headTags.item(0);
+    }
+
+    // Move all style tags into head
+    // TODO Convert all @imports into a concatenated link tag
+    NodeList styleTags = doc.getElementsByTagName("STYLE");
+    for (int i = 0; i < styleTags.getLength(); i++) {
+      Node styleNode = styleTags.item(i);
+      mutated = true;
+      if (!styleNode.getParentNode().getNodeName().equalsIgnoreCase("HEAD")) {
+        styleNode.getParentNode().removeChild(styleNode);
+        head.appendChild(styleNode);
       }
+      styleNode.setTextContent(rewriteCss(styleNode.getTextContent(), baseUri));
     }
 
+    if (mutated) {
+      MutableContent.notifyEdit(doc);
+    }
     return RewriterResults.cacheableIndefinitely();
   }
 
   private String rewriteCss(String styleText, URI baseUri) {
     return CssRewriter.rewrite(styleText, baseUri, linkRewriter);
   }
-
-  private static String getNodeChildText(GadgetHtmlNode node) {
-    // TODO: move this to GadgetHtmlNode as a helper
-    StringBuilder builder = new StringBuilder();
-    for (GadgetHtmlNode child : node.getChildren()) {
-      if (child.isText()) {
-        builder.append(child.getText());
-      }
-    }
-    return builder.toString();
-  }
-
 }

Modified: incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/HtmlParserTest.java
URL: http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/HtmlParserTest.java?rev=707804&r1=707803&r2=707804&view=diff
==============================================================================
--- incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/HtmlParserTest.java (original)
+++ incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/HtmlParserTest.java Fri Oct 24 18:17:10 2008
@@ -17,13 +17,17 @@
  */
 package org.apache.shindig.gadgets.parse;
 
+import junit.framework.TestCase;
 import org.apache.shindig.gadgets.parse.caja.CajaHtmlParser;
 import org.apache.shindig.gadgets.parse.nekohtml.NekoHtmlParser;
-
-import junit.framework.TestCase;
-
-import java.util.List;
-
+import org.w3c.dom.Document;
+import org.w3c.dom.Node;
+import org.w3c.dom.NodeList;
+
+/**
+ * Note these tests are of marginal use. Consider removing. More useful tests would exercise
+ * the capability of the parser to handle strange HTML.
+ */
 public class HtmlParserTest extends TestCase {
 
   private final GadgetHtmlParser cajaParser = new CajaHtmlParser(
@@ -38,16 +42,14 @@
   }
 
   private void parseSimpleString(GadgetHtmlParser htmlParser) throws Exception {
-    List<ParsedHtmlNode> nodes = htmlParser.parse("content");
-    assertNotNull(nodes);
-    assertEquals(1, nodes.size());
-    
-    ParsedHtmlNode node = nodes.get(0);
+    Document doc = htmlParser.parseDom("content");
+
+    Node node = doc.getDocumentElement().getFirstChild();
     assertNotNull(node);
-    assertEquals("content", node.getText());
+    assertEquals("content", node.getTextContent());
     assertNull(node.getAttributes());
-    assertNull(node.getChildren());
-    assertNull(node.getTagName());
+    assertNullOrEmpty(node.getChildNodes());
+    assertEquals(Node.TEXT_NODE, node.getNodeType());
   }
 
   public void testParseTagWithStringContents() throws Exception {
@@ -56,19 +58,11 @@
   }
 
   public void parseTagWithStringContents(GadgetHtmlParser htmlParser) throws Exception {
-    List<ParsedHtmlNode> nodes =
-        htmlParser.parse("<span>content</span>");
-    assertNotNull(nodes);
-    assertEquals(1, nodes.size());
-    
-    ParsedHtmlNode node = nodes.get(0);
-    assertNull(node.getText());
-    assertNotNull(node.getAttributes());
-    assertEquals(0, node.getAttributes().size());
-    assertNotNull(node.getChildren());
-    assertEquals(1, node.getChildren().size());
-    assertEquals("content", node.getChildren().get(0).getText());
-    assertEquals("span", node.getTagName().toLowerCase());
+    Document doc = htmlParser.parseDom("<span>content</span>");
+
+    Node node = doc.getDocumentElement().getFirstChild();
+    assertEquals("content", node.getTextContent());
+    assertEquals("span", node.getNodeName().toLowerCase());
   }
 
   public void testParseTagWithAttributes() throws Exception {
@@ -77,21 +71,17 @@
   }
 
   void parseTagWithAttributes(GadgetHtmlParser htmlParser) throws Exception {
-    List<ParsedHtmlNode> nodes =
-        htmlParser.parse("<div id=\"foo\">content</div>");
-    assertNotNull(nodes);
-    assertEquals(1, nodes.size());
-    
-    ParsedHtmlNode node = nodes.get(0);
+    Document doc = htmlParser.parseDom("<div id=\"foo\">content</div>");
+
+    Node node = doc.getDocumentElement().getFirstChild();
     assertNotNull(node);
-    assertNull(node.getText());
     assertNotNull(node.getAttributes());
-    assertEquals(1, node.getAttributes().size());
-    assertEquals("id", node.getAttributes().get(0).getName());
-    assertEquals("foo", node.getAttributes().get(0).getValue());
-    assertNotNull(node.getChildren());
-    assertEquals(1, node.getChildren().size());
-    assertEquals("content", node.getChildren().get(0).getText());
+    assertEquals(1, node.getAttributes().getLength());
+    assertEquals("id", node.getAttributes().item(0).getNodeName());
+    assertEquals("foo", node.getAttributes().item(0).getNodeValue());
+    assertNotNull(node.getChildNodes());
+    assertEquals(1, node.getChildNodes().getLength());
+    assertEquals("content", node.getChildNodes().item(0).getTextContent());
   }
 
   public void testParseStringUnescapesProperly() throws Exception {
@@ -100,17 +90,13 @@
   }
 
   void parseStringUnescapesProperly(GadgetHtmlParser htmlParser) throws Exception {
-    List<ParsedHtmlNode> nodes =
-        htmlParser.parse("&lt;content&amp;&apos;chrome&apos;&gt;");
-    assertNotNull(nodes);
-    assertEquals(1, nodes.size());
-    
-    ParsedHtmlNode node = nodes.get(0);
+    Document doc = htmlParser.parseDom("&lt;content&amp;&apos;chrome&apos;&gt;");
+
+    Node node = doc.getDocumentElement().getFirstChild();
     assertNotNull(node);
-    assertEquals("<content&'chrome'>", node.getText());
+    assertEquals("<content&'chrome'>", node.getTextContent());
     assertNull(node.getAttributes());
-    assertNull(node.getChildren());
-    assertNull(node.getTagName());
+    assertNullOrEmpty(node.getChildNodes());
   }
 
   public void testParseNestedContentWithNoCloseForBrAndHr() throws Exception {
@@ -119,99 +105,50 @@
   }
 
   void parseNestedContentWithNoCloseForBrAndHr(GadgetHtmlParser htmlParser) throws Exception {
-    List<ParsedHtmlNode> nodes =
-        htmlParser.parse("<div><br>  and  <hr></div>");
-    assertNotNull(nodes);
-    assertEquals(1, nodes.size());
-    
-    ParsedHtmlNode divNode = nodes.get(0);
-    assertNull(divNode.getText());
-    assertEquals("div", divNode.getTagName().toLowerCase());
+    Document doc = htmlParser.parseDom("<div><br>  and  <hr></div>");
+
+    Node divNode = doc.getDocumentElement().getFirstChild();
+    assertEquals("div", divNode.getNodeName().toLowerCase());
     assertNotNull(divNode.getAttributes());
-    assertEquals(0, divNode.getAttributes().size());
-    assertNotNull(divNode.getChildren());
-    assertEquals(3, divNode.getChildren().size());
+    assertEquals(0, divNode.getAttributes().getLength());
+    assertNotNull(divNode.getChildNodes());
+    assertEquals(3, divNode.getChildNodes().getLength());
     
     {
       // <br>
-      ParsedHtmlNode divChild = divNode.getChildren().get(0);
+      Node divChild = divNode.getChildNodes().item(0);
       assertNotNull(divChild);
-      assertEquals("br", divChild.getTagName().toLowerCase());
-      assertNull(divChild.getText());
+      assertEquals("br", divChild.getNodeName().toLowerCase());
       assertNotNull(divChild.getAttributes());
-      assertEquals(0, divChild.getAttributes().size());
-      assertNullOrEmpty(divChild.getChildren());
+      assertEquals(0, divChild.getAttributes().getLength());
+      assertEquals(0, divChild.getChildNodes().getLength());
     }
     
     {
       // text
-      ParsedHtmlNode divChild = divNode.getChildren().get(1);
-      assertEquals("  and  ", divChild.getText());
+      Node divChild = divNode.getChildNodes().item(1);
+      assertEquals("  and  ", divChild.getTextContent());
       assertNull(divChild.getAttributes());
-      assertNull(divChild.getChildren());
-      assertNull(divChild.getTagName());
+      assertNullOrEmpty(divChild.getChildNodes());
     }
     
     {
       // <hr> should be parsed lieniently
-      ParsedHtmlNode divChild = divNode.getChildren().get(2);
+      Node divChild = divNode.getChildNodes().item(2);
       assertNotNull(divChild);
-      assertEquals("hr", divChild.getTagName().toLowerCase());
-      assertNull(divChild.getText());
+      assertEquals("hr", divChild.getNodeName().toLowerCase());
       assertNotNull(divChild.getAttributes());
-      assertEquals(0, divChild.getAttributes().size());
-      assertNullOrEmpty(divChild.getChildren());
+      assertEquals(0, divChild.getAttributes().getLength());
+      assertEquals(0, divChild.getChildNodes().getLength());
     }
   }
 
-  public void testParseMixedSiblings() throws Exception {
-    parseMixedSiblings(nekoParser);
-    parseMixedSiblings(cajaParser);
-  }
-
-  void parseMixedSiblings(GadgetHtmlParser htmlParser) throws Exception {
-    List<ParsedHtmlNode> nodes =
-        htmlParser.parse("content<span>more</span><div id=\"foo\">yet more</div>");
-    assertNotNull(nodes);
-    assertEquals(3, nodes.size());
-    
-    {
-      ParsedHtmlNode textNode = nodes.get(0);
-      assertEquals("content", textNode.getText());
-    }
-    
-    {
-      ParsedHtmlNode spanNode = nodes.get(1);
-      assertNull(spanNode.getText());
-      assertNotNull(spanNode.getAttributes());
-      assertEquals(0, spanNode.getAttributes().size());
-      assertNotNull(spanNode.getChildren());
-      assertEquals(1, spanNode.getChildren().size());
-      assertEquals("more", spanNode.getChildren().get(0).getText());
-    }
-    
-    {
-      ParsedHtmlNode divNode = nodes.get(2);
-      assertNull(divNode.getText());
-      assertNotNull(divNode.getAttributes());
-      assertEquals(1, divNode.getAttributes().size());
-      assertEquals("id", divNode.getAttributes().get(0).getName());
-      assertEquals("foo", divNode.getAttributes().get(0).getValue());
-      assertNotNull(divNode.getChildren());
-      assertEquals(1, divNode.getChildren().size());
-      assertEquals("yet more", divNode.getChildren().get(0).getText());
-    }
-  }
-  
   // TODO: figure out to what extent it makes sense to test "invalid"
   // HTML, semi-structured HTML, and comment parsing
 
   // Different parsers either return null or empty child lists.
   // In particular because Caja is a non-w3c compliant parser
-  private void assertNullOrEmpty(List l) {
-    if (l != null && !l.isEmpty()) {
-      assertTrue(true);
-    }
-    return;
+  private void assertNullOrEmpty(NodeList l) {
+    assertTrue(l == null || l.getLength() == 0);
   }
 }

Modified: incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/ParseTreeSerializerBenchmark.java
URL: http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/ParseTreeSerializerBenchmark.java?rev=707804&r1=707803&r2=707804&view=diff
==============================================================================
--- incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/ParseTreeSerializerBenchmark.java (original)
+++ incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/ParseTreeSerializerBenchmark.java Fri Oct 24 18:17:10 2008
@@ -17,26 +17,22 @@
  */
 package org.apache.shindig.gadgets.parse;
 
+import org.apache.commons.io.IOUtils;
+import org.apache.shindig.gadgets.GadgetException;
 import org.apache.shindig.gadgets.parse.caja.CajaHtmlParser;
 import org.apache.shindig.gadgets.parse.nekohtml.NekoHtmlParser;
-import org.apache.shindig.gadgets.GadgetException;
-
-import org.apache.commons.io.IOUtils;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNull;
+import org.apache.xml.serialize.HTMLSerializer;
+import org.cyberneko.html.parsers.SAXParser;
 import org.w3c.dom.Node;
 import org.w3c.dom.bootstrap.DOMImplementationRegistry;
 import org.w3c.dom.ls.*;
 
-import java.io.ByteArrayOutputStream;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.ByteArrayInputStream;
-import java.util.List;
+import java.io.*;
 
 /**
- * Tests serialization and deserialization of parse trees.
+ * Benchmarks for HTML parsing and serialization
+ *
+ * NOTE - Uncomment DOM4J bits to test that.
  */
 public class ParseTreeSerializerBenchmark {
   private DOMImplementationRegistry registry = DOMImplementationRegistry.newInstance();
@@ -44,6 +40,9 @@
   private String content;
   private GadgetHtmlParser cajaParser = new CajaHtmlParser(new ParseModule.HTMLDocumentProvider());
   private GadgetHtmlParser nekoParser = new NekoHtmlParser(new ParseModule.HTMLDocumentProvider());
+  private boolean warmup;
+  private SAXParser saxParser;
+  //private SAXReader saxReader;
 
   private ParseTreeSerializerBenchmark(String file, int numRuns) throws Exception {
     File inputFile = new File(file);
@@ -52,82 +51,146 @@
       System.exit(1);
     }
     content = new String(IOUtils.toByteArray(new FileInputStream(file)));
-    this.numRuns = numRuns;
 
-    System.out.println("Caja Parse------------------------");
-    run(cajaParser);
-    System.out.println("Neko Parse------------------------");
-    run(nekoParser);
+    saxParser = new SAXParser();
+    //saxParser.setFeature("http://cyberneko.org/html/features/scanner/script/strip-comment-delims",true);
+    saxParser.setFeature("http://cyberneko.org/html/features/scanner/notify-builtin-refs",true);
+    //saxReader = new SAXReader(saxParser);
+    //saxReader.setValidation(false);
+
+    this.numRuns = 50;
+    warmup = true;
+    runCaja();
+    runNeko();
+    runLS();
+    Thread.sleep(10000L);
+    this.numRuns = 300; //numRuns;
+    warmup = false;
+    runCaja();
+    runNeko();
+    runLS();
   }
 
-  private void run(GadgetHtmlParser parser) throws Exception {
-
+  private void runCaja() throws Exception {
+    output("Caja-----------------");
     // Some warmup runs with wait. Enough iterations to trigger the JIT
     // Wait to allow it to swap execution paths etc...
-    timeParseDom(parser, false);
-    timeParseOld(parser, false);
-    runLSSerializationTiming(parser, false);
-    Thread.sleep(1000L);
-
-    //System.out.println("Press a key to continue");
-    //System.in.read();
-    //System.out.println("Continuing");
-
-    timeParseOld(parser, true);
-    timeParseDom(parser, true);
-    runLSSerializationTiming(parser, true);
-
-    /*
-    System.out.println("Serializing and deserializing results of Caja run (" +
-        nodes.size() + " top-level nodes, " + numRuns + " runs)\n");
+    timeParseDom(cajaParser);
+  }
 
+  private void runNeko() throws Exception {
+    output("Neko-----------------");
+    timeParseDom(nekoParser);
+    //timeParseDom4J();
+    //timeParseDom4JSerialize();
+    timeParseDomSerialize(nekoParser);
+  }
 
-    long serTime = 0, deserTime = 0;
-    for (int i = 0; i < numRuns; ++i) {
-      long serStart = System.currentTimeMillis();
-      byte[] ser = pts.serialize(nodes);
-      serTime += (System.currentTimeMillis() - serStart);
-      long deserStart = System.currentTimeMillis();
-      List<ParsedHtmlNode> outs = pts.deserialize(ser);
-      deserTime += (System.currentTimeMillis() - deserStart);
-      //checkListEquality(nodes, outs);
+  private void runLS() throws Exception {
+    output("LOAD/STORE-----------------");
+    runLSSerializationTiming(nekoParser);
+  }
+
+  private void output(String string) {
+    if (!warmup) {
+      System.out.println(string);
     }
-    */
-    
-    //System.out.println("Serialization [" + serTime + " ms total: "
-    //    + ((double)serTime)/numRuns + "ms/run]");
-    //System.out.println("Deserialization [" + deserTime + " ms total: "
-    //    + ((double)deserTime)/numRuns + "ms/run]");
   }
 
-  private void timeParseDom(GadgetHtmlParser parser, boolean output) throws GadgetException {
+  private void timeParseDom(GadgetHtmlParser parser) throws GadgetException {
     long parseStart = System.currentTimeMillis();
-    for (int i = 0; i < 10; ++i) {
+    for (int i = 0; i < numRuns; ++i) {
       parser.parseDom(content);
     }
     long parseMillis = System.currentTimeMillis() - parseStart;
 
-    if (output) {
-      System.out.println("Parsing W3C DOM [" + parseMillis + " ms total: " +
+    output("Parsing W3C DOM [" + parseMillis + " ms total: " +
           ((double)parseMillis)/numRuns + "ms/run]");
+  }
+
+  /*
+  private void timeParseDom4J() throws GadgetException {
+    try {
+      long parseStart = System.currentTimeMillis();
+      for (int i = 0; i < numRuns; ++i) {
+         saxReader.read(new InputSource(new StringReader(content)));
+      }
+      long parseMillis = System.currentTimeMillis() - parseStart;
+
+      output("Parsing DOM4J [" + parseMillis + " ms total: " +
+            ((double)parseMillis)/numRuns + "ms/run]");
+    } catch (Exception e) {
+      throw new GadgetException(GadgetException.Code.HTML_PARSE_ERROR, e);
+    }
+  }
+  */
+
+  /*
+  private void timeParseDom4JSerialize() throws GadgetException {
+    try {
+      Document document =  saxReader.read(new InputSource(new StringReader(content)));
+      OutputFormat format = OutputFormat.createCompactFormat();
+      format.setXHTML(false);
+
+      long parseStart = System.currentTimeMillis();
+      for (int i = 0; i < numRuns; ++i) {
+        StringWriter sw = new StringWriter((content.length() * 11) / 10);
+        HTMLWriter htmlWriter = new HTMLWriter(sw, format) {
+          protected void writeEntity(Entity entity) throws IOException {
+            writer.write("&");
+            writer.write(entity.getName());
+            writer.write(";");
+            lastOutputNodeType = org.dom4j.Node.ENTITY_REFERENCE_NODE;
+          }
+        };
+        //htmlWriter.setResolveEntityRefs(false);
+        htmlWriter.setEscapeText(false);
+        htmlWriter.write(document);
+      }
+      long parseMillis = System.currentTimeMillis() - parseStart;
+
+      output("Serializing DOM4J [" + parseMillis + " ms total: " +
+            ((double)parseMillis)/numRuns + "ms/run]");
+    } catch (Exception e) {
+      throw new GadgetException(GadgetException.Code.HTML_PARSE_ERROR, e);
+    }
+
+  }
+  */
+
+  private void timeParseDomSerialize(GadgetHtmlParser parser) throws GadgetException {
+    org.w3c.dom.Document document = parser.parseDom(content);
+
+    try {
+      long parseStart = System.currentTimeMillis();
+      for (int i = 0; i < numRuns; ++i) {
+        StringWriter sw = new StringWriter((content.length() * 11) / 10);
+        HTMLSerializer xercesSerializer = new HTMLSerializer(sw, new org.apache.xml.serialize.OutputFormat());
+        xercesSerializer.serialize(document);
+      }
+      long parseMillis = System.currentTimeMillis() - parseStart;
+
+      output("Serializing Xerces [" + parseMillis + " ms total: " +
+            ((double) parseMillis) / numRuns + "ms/run]");
+    } catch (Exception e) {
+      throw new GadgetException(GadgetException.Code.HTML_PARSE_ERROR, e);
     }
   }
 
-  private void timeParseOld(GadgetHtmlParser parser, boolean output) throws GadgetException {
+  /*
+  private void timeParseOld(GadgetHtmlParser parser) throws GadgetException {
     long parseStart = System.currentTimeMillis();
-    List<ParsedHtmlNode> nodes;
     for (int i = 0; i < numRuns; ++i) {
-      nodes = parser.parse(content);
+      parser.parse(content);
     }
     long parseMillis = System.currentTimeMillis() - parseStart;
 
-    if (output) {
-      System.out.println("Parsing [" + parseMillis + " ms total: " +
+    output("Parsing [" + parseMillis + " ms total: " +
           ((double)parseMillis)/numRuns + "ms/run]");
-    }
   }
+  */
 
-  private void runLSSerializationTiming(GadgetHtmlParser parser, boolean outputResult) throws Exception {
+  private void runLSSerializationTiming(GadgetHtmlParser parser) throws Exception {
     Node n = parser.parseDom(content);
     DOMImplementationLS impl = (DOMImplementationLS) registry.getDOMImplementation("LS");
     ByteArrayOutputStream baos;
@@ -152,12 +215,10 @@
       //checkListEquality(nodes, outs);
     }
 
-    if (outputResult) {
-      System.out.println("LS Serialization [" + serTime + " ms total: "
+    output("LS Serialization [" + serTime + " ms total: "
           + ((double)serTime)/numRuns + "ms/run]");
-      System.out.println("LS Deserialization [" + deserTime + " ms total: "
+    output("LS Deserialization [" + deserTime + " ms total: "
           + ((double)deserTime)/numRuns + "ms/run]");
-    }
   }
 
   public static void main(String[] args) {

Modified: incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/rewrite/FeatureBasedRewriterTestBase.java
URL: http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/rewrite/FeatureBasedRewriterTestBase.java?rev=707804&r1=707803&r2=707804&view=diff
==============================================================================
--- incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/rewrite/FeatureBasedRewriterTestBase.java (original)
+++ incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/rewrite/FeatureBasedRewriterTestBase.java Fri Oct 24 18:17:10 2008
@@ -17,28 +17,23 @@
  */
 package org.apache.shindig.gadgets.rewrite;
 
-import static org.easymock.EasyMock.expect;
-import static org.easymock.classextension.EasyMock.replay;
-
+import junit.framework.TestCase;
 import org.apache.shindig.common.uri.Uri;
 import org.apache.shindig.gadgets.Gadget;
 import org.apache.shindig.gadgets.GadgetContext;
 import org.apache.shindig.gadgets.parse.GadgetHtmlParser;
-import org.apache.shindig.gadgets.parse.ParsedHtmlNode;
 import org.apache.shindig.gadgets.spec.GadgetSpec;
-
-import junit.framework.TestCase;
-
+import static org.easymock.EasyMock.expect;
 import org.easymock.classextension.EasyMock;
+import static org.easymock.classextension.EasyMock.replay;
+import org.w3c.dom.Document;
 
 import java.net.URI;
-import java.util.Arrays;
 import java.util.HashSet;
-import java.util.List;
 import java.util.Set;
 
 public abstract class FeatureBasedRewriterTestBase extends TestCase {
-  protected URI baseUri;
+  URI baseUri;
 
   @Override
   protected void setUp() throws Exception {
@@ -46,12 +41,12 @@
     baseUri = new URI("http://gadget.org/dir/gadget.xml");
   }
 
-  protected ContentRewriterFeature.Factory mockContentRewriterFeatureFactory(
+  ContentRewriterFeature.Factory mockContentRewriterFeatureFactory(
       ContentRewriterFeature feature) {
     return new MockRewriterFeatureFactory(feature);
   }
 
-  protected ContentRewriterFeature makeFeature(String... includedTags) {
+  ContentRewriterFeature makeFeature(String... includedTags) {
     ContentRewriterFeature rewriterFeature =
         EasyMock.createNiceMock(ContentRewriterFeature.class);
     Set<String> tags = new HashSet<String>();
@@ -65,11 +60,10 @@
     return rewriterFeature;
   }
 
-  protected String rewriteHelper(ContentRewriter rewriter, String s, ParsedHtmlNode[] p)
+  String rewriteHelper(ContentRewriter rewriter, String s, Document doc)
       throws Exception {
     GadgetHtmlParser parser = EasyMock.createNiceMock(GadgetHtmlParser.class);
-    List<ParsedHtmlNode> expected = p != null ? Arrays.asList(p) : null;
-    expect(parser.parse(s)).andReturn(expected).anyTimes();
+    expect(parser.parseDom(s)).andReturn(doc).anyTimes();
 
     replay(parser);
 
@@ -90,9 +84,19 @@
         .setContext(context)
         .setSpec(spec);
     rewriter.rewrite(gadget, mc);
-    return mc.getContent();
+
+    String rewrittenContent = mc.getContent();
+
+    // Strip around the HTML tags for convenience
+    int htmlTagIndex = rewrittenContent.indexOf("<HTML>");
+    if (htmlTagIndex != -1) {
+      return rewrittenContent.substring(htmlTagIndex + 6,
+          rewrittenContent.lastIndexOf("</HTML>"));
+    }
+    return rewrittenContent;
   }
 
+
   private static class MockRewriterFeatureFactory extends ContentRewriterFeature.Factory {
     private final ContentRewriterFeature feature;
 

Modified: incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/rewrite/JsTagConcatContentRewriterTest.java
URL: http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/rewrite/JsTagConcatContentRewriterTest.java?rev=707804&r1=707803&r2=707804&view=diff
==============================================================================
--- incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/rewrite/JsTagConcatContentRewriterTest.java (original)
+++ incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/rewrite/JsTagConcatContentRewriterTest.java Fri Oct 24 18:17:10 2008
@@ -18,25 +18,28 @@
  */
 package org.apache.shindig.gadgets.rewrite;
 
-import static org.easymock.EasyMock.expect;
-import static org.easymock.classextension.EasyMock.replay;
-
+import com.google.inject.Guice;
+import com.google.inject.Injector;
 import org.apache.shindig.common.uri.Uri;
-import org.apache.shindig.gadgets.parse.GadgetHtmlNodeTest;
-import org.apache.shindig.gadgets.parse.ParsedHtmlNode;
+import org.apache.shindig.gadgets.parse.GadgetHtmlParser;
+import org.apache.shindig.gadgets.parse.ParseModule;
 import org.apache.shindig.gadgets.spec.GadgetSpec;
-
+import static org.easymock.EasyMock.expect;
 import org.easymock.classextension.EasyMock;
+import static org.easymock.classextension.EasyMock.replay;
+import org.w3c.dom.Document;
 
 public class JsTagConcatContentRewriterTest extends FeatureBasedRewriterTestBase {
-  private ContentRewriterFeature jsFeature;
   private JsTagConcatContentRewriter rewriter;
   private String concatBase;
+  private GadgetHtmlParser htmlParser;
 
   @Override
   protected void setUp() throws Exception {
     super.setUp();
-    jsFeature = makeFeature("script");
+    ContentRewriterFeature jsFeature = makeFeature("script");
+    Injector injector = Guice.createInjector(new ParseModule());
+    htmlParser = injector.getInstance(GadgetHtmlParser.class);
     ContentRewriterFeature.Factory factory = mockContentRewriterFeatureFactory(jsFeature);
     rewriter = new JsTagConcatContentRewriter(factory, null);
     GadgetSpec spec = EasyMock.createNiceMock(GadgetSpec.class);
@@ -46,149 +49,101 @@
   }
 
   public void testJSMergePreserveNoExternal() throws Exception {
-    String s = "<script>\n"
+    String s = "<SCRIPT>\n"
         + "doSomething\n"
-        + "</script>";
-    ParsedHtmlNode[] scriptKids = {
-      GadgetHtmlNodeTest.makeParsedTextNode("\ndoSomething\n")
-    };
-    ParsedHtmlNode[] p = {
-      GadgetHtmlNodeTest.makeParsedTagNode("script", null, scriptKids)
-    };
-    assertEquals(s, rewriteHelper(rewriter, s, p));
+        + "</SCRIPT>";
+
+    Document document = htmlParser.parseDom(s);
+    String rewritten = rewriteHelper(rewriter, s, document);
+    assertEquals(rewritten, s);
   }
 
   public void testJSMergePreserveNoScript() throws Exception {
     String s
-        = "<html><div id=\"test\">ceci ne pas une script</div></html>";
-    String[][] attribs = { { "id", "test" } };
-    ParsedHtmlNode[] divKids = {
-      GadgetHtmlNodeTest.makeParsedTextNode("ceci ne pas une script")
-    };
-    ParsedHtmlNode[] htmlKids = {
-      GadgetHtmlNodeTest.makeParsedTagNode("div", attribs, divKids)
-    };
-    ParsedHtmlNode[] p = {
-      GadgetHtmlNodeTest.makeParsedTagNode("html", null, htmlKids)
-    };
-    assertEquals(s, rewriteHelper(rewriter, s, p));
+        = "<DIV id=\"test\">ceci ne pas une script</DIV>";
+    Document document = htmlParser.parseDom(s);
+    String rewritten = rewriteHelper(rewriter, s, document);
+    assertEquals(rewritten, s);
   }
 
   public void testJSMergePreserveWithComment() throws Exception {
-    String s = "<script>" +
+    String s = "<SCRIPT>" +
         "<!--\ndoSomething\n-->" +
-        "</script>";
-    ParsedHtmlNode[] scriptKids = {
-      GadgetHtmlNodeTest.makeParsedTextNode("<!--\ndoSomething\n-->")
-    };
-    ParsedHtmlNode[] p = {
-      GadgetHtmlNodeTest.makeParsedTagNode("script", null, scriptKids)
-    };
-    assertEquals(s, rewriteHelper(rewriter, s, p));
+        "</SCRIPT>";
+    Document document = htmlParser.parseDom(s);
+    String rewritten = rewriteHelper(rewriter, s, document);
+    assertEquals(rewritten, s);
   }
 
   public void testJSMergeSingleScriptReWrite() throws Exception {
-    String s = "<script src=\"http://a.b.com/1.js\"></script>";
-    String[][] attribs = { { "src", "http://a.b.com/1.js" } };
-    ParsedHtmlNode[] p = {
-      GadgetHtmlNodeTest.makeParsedTagNode("script", attribs, null)
-    };
-    String rewritten
-        = "<script src=\"" + concatBase + "1=http%3A%2F%2Fa.b.com%2F1.js\"></script>";
-    assertEquals(rewritten, rewriteHelper(rewriter, s, p));
+    String s = "<SCRIPT src=\"http://a.b.com/1.js\"></SCRIPT>";
+    String expected = "<SCRIPT src=\"" + concatBase + "1=http%3A%2F%2Fa.b.com%2F1.js\"></SCRIPT>";
+    Document document = htmlParser.parseDom(s);
+    String rewritten = rewriteHelper(rewriter, s, document);
+    assertEquals(rewritten, expected);
   }
 
   public void testJSMergeTwoScriptReWriteWithWhitespace() throws Exception {
-    String s = "<script src=\"http://a.b.com/1.js\"></script>\n"
-        + "<script src=\"http://a.b.com/2.js\"></script>";
-    String[][] attr1 = { { "src", "http://a.b.com/1.js" } };
-    String[][] attr2 = { { "src", "http://a.b.com/2.js" } };
-    ParsedHtmlNode[] p = {
-      GadgetHtmlNodeTest.makeParsedTagNode("script", attr1, null),
-      GadgetHtmlNodeTest.makeParsedTextNode("\n"),
-      GadgetHtmlNodeTest.makeParsedTagNode("script", attr2, null)
-    };
-    String rewritten
-        = "<script src=\"" + concatBase + "1=http%3A%2F%2Fa.b.com%2F1.js&2=http%3A%2F%2Fa.b.com%2F2.js\"></script>";
-    assertEquals(rewritten, rewriteHelper(rewriter, s, p));
+    String s = "<SCRIPT src=\"http://a.b.com/1.js\"></SCRIPT>"
+        + "<SCRIPT src=\"http://a.b.com/2.js\"></SCRIPT>";
+    String expected
+        = "<SCRIPT src=\"" + concatBase + "1=http%3A%2F%2Fa.b.com%2F1.js&2=http%3A%2F%2Fa.b.com%2F2.js\"></SCRIPT>";
+    Document document = htmlParser.parseDom(s);
+    String rewritten = rewriteHelper(rewriter, s, document);
+    assertEquals(rewritten, expected);
   }
 
   public void testJSMergeLeadAndTrailingScriptReWrite() throws Exception {
-    String s = "<script>\n"
+    String s = "<SCRIPT>\n"
         + "doSomething\n"
-        + "</script>"
-        + "<script src=\"http://a.b.com/1.js\"></script>"
-        + "<script src=\"http://a.b.com/2.js\"></script>"
-        + "<script>"
+        + "</SCRIPT>"
+        + "<SCRIPT src=\"http://a.b.com/1.js\"></SCRIPT>"
+        + "<SCRIPT src=\"http://a.b.com/2.js\"></SCRIPT>"
+        + "<SCRIPT>\n"
         + "doSomething\n"
-        + "</script>";
-    String[][] attr1 = { { "src", "http://a.b.com/1.js" } };
-    String[][] attr2 = { { "src", "http://a.b.com/2.js" } };
-    ParsedHtmlNode[] scriptKids = {
-      GadgetHtmlNodeTest.makeParsedTextNode("\ndoSomething\n")
-    };
-    ParsedHtmlNode[] p = {
-      GadgetHtmlNodeTest.makeParsedTagNode("script", null, scriptKids),
-      GadgetHtmlNodeTest.makeParsedTagNode("script", attr1, null),
-      GadgetHtmlNodeTest.makeParsedTagNode("script", attr2, null),
-      GadgetHtmlNodeTest.makeParsedTagNode("script", null, scriptKids)
-    };
-    String rewritten = "<script>\n"
+        + "</SCRIPT>";
+    String expected = "<SCRIPT>\n"
         + "doSomething\n"
-        + "</script>"
-        + "<script src=\"" + concatBase + "1=http%3A%2F%2Fa.b.com%2F1.js&2=http%3A%2F%2Fa.b.com%2F2.js\"></script>"
-        + "<script>\n"
+        + "</SCRIPT>"
+        + "<SCRIPT src=\"" + concatBase + "1=http%3A%2F%2Fa.b.com%2F1.js&2=http%3A%2F%2Fa.b.com%2F2.js\"></SCRIPT>"
+        + "<SCRIPT>\n"
         + "doSomething\n"
-        + "</script>";
-    assertEquals(rewritten, rewriteHelper(rewriter, s, p));
+        + "</SCRIPT>";
+    Document document = htmlParser.parseDom(s);
+    String rewritten = rewriteHelper(rewriter, s, document);
+    assertEquals(rewritten, expected);
   }
 
   public void testJSMergeInterspersed() throws Exception {
-    String s = "<script src=\"http://a.b.com/1.js\"></script>"
-        + "<script src=\"http://a.b.com/2.js\"></script>"
-        + "<script><!-- doSomething --></script>"
-        + "<script src=\"http://a.b.com/3.js\"></script>"
-        + "<script src=\"http://a.b.com/4.js\"></script>";
-    String[][] attr1 = { { "src", "http://a.b.com/1.js" } };
-    String[][] attr2 = { { "src", "http://a.b.com/2.js" } };
-    String[][] attr3 = { { "src", "http://a.b.com/3.js" } };
-    String[][] attr4 = { { "src", "http://a.b.com/4.js" } };
-    ParsedHtmlNode[] scriptKids = {
-      GadgetHtmlNodeTest.makeParsedTextNode("<!-- doSomething -->")
-    };
-    ParsedHtmlNode[] p = {
-      GadgetHtmlNodeTest.makeParsedTagNode("script", attr1, null),
-      GadgetHtmlNodeTest.makeParsedTagNode("script", attr2, null),
-      GadgetHtmlNodeTest.makeParsedTagNode("script", null, scriptKids),
-      GadgetHtmlNodeTest.makeParsedTagNode("script", attr3, null),
-      GadgetHtmlNodeTest.makeParsedTagNode("script", attr4, null)
-    };
-    String rewritten =
-        "<script src=\"" + concatBase + "1=http%3A%2F%2Fa.b.com%2F1.js&2=http%3A%2F%2Fa.b.com%2F2.js\"></script>" +
-        "<script><!-- doSomething --></script>" +
-        "<script src=\"" + concatBase + "1=http%3A%2F%2Fa.b.com%2F3.js&2=http%3A%2F%2Fa.b.com%2F4.js\"></script>";
-    assertEquals(rewritten, rewriteHelper(rewriter, s, p));
+    String s = "<SCRIPT src=\"http://a.b.com/1.js\"></SCRIPT>"
+        + "<SCRIPT src=\"http://a.b.com/2.js\"></SCRIPT>"
+        + "<SCRIPT><!-- doSomething --></SCRIPT>"
+        + "<SCRIPT src=\"http://a.b.com/3.js\"></SCRIPT>"
+        + "<SCRIPT src=\"http://a.b.com/4.js\"></SCRIPT>";
+    String expected =
+        "<SCRIPT src=\"" + concatBase + "1=http%3A%2F%2Fa.b.com%2F1.js&2=http%3A%2F%2Fa.b.com%2F2.js\"></SCRIPT>" +
+        "<SCRIPT><!-- doSomething --></SCRIPT>" +
+        "<SCRIPT src=\"" + concatBase + "1=http%3A%2F%2Fa.b.com%2F3.js&2=http%3A%2F%2Fa.b.com%2F4.js\"></SCRIPT>";
+    Document document = htmlParser.parseDom(s);
+    String rewritten = rewriteHelper(rewriter, s, document);
+    assertEquals(expected, rewritten);
   }
 
   public void testJSMergeDerelativizeHostRelative() throws Exception {
-    String s = "<script src=\"/1.js\"></script>";
-    String[][] attr1 = { { "src", "/1.js" } };
-    ParsedHtmlNode[] p = {
-      GadgetHtmlNodeTest.makeParsedTagNode("script", attr1, null)
-    };
-    String rewritten
-        = "<script src=\"" + concatBase + "1=http%3A%2F%2Fgadget.org%2F1.js\"></script>";
-    assertEquals(rewritten, rewriteHelper(rewriter, s, p));
+    String s = "<SCRIPT src=\"/1.js\"></SCRIPT>";
+    String expected
+        = "<SCRIPT src=\"" + concatBase + "1=http%3A%2F%2Fgadget.org%2F1.js\"></SCRIPT>";
+    Document document = htmlParser.parseDom(s);
+    String rewritten = rewriteHelper(rewriter, s, document);
+    assertEquals(rewritten, expected);
   }
 
   public void testJSMergeDerelativizePathRelative() throws Exception {
-    String s = "<script src=\"1.js\"></script>";
-    String[][] attr1 = { { "src", "1.js" } };
-    ParsedHtmlNode[] p = {
-      GadgetHtmlNodeTest.makeParsedTagNode("script", attr1, null)
-    };
-    String rewritten
-        = "<script src=\"" + concatBase + "1=http%3A%2F%2Fgadget.org%2Fdir%2F1.js\"></script>";
-    assertEquals(rewritten, rewriteHelper(rewriter, s, p));
+    String s = "<SCRIPT src=\"1.js\"></SCRIPT>";
+    String expected
+        = "<SCRIPT src=\"" + concatBase + "1=http%3A%2F%2Fgadget.org%2Fdir%2F1.js\"></SCRIPT>";
+    Document document = htmlParser.parseDom(s);
+    String rewritten = rewriteHelper(rewriter, s, document);
+    assertEquals(rewritten, expected);
   }
 }

Modified: incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/rewrite/LinkingTagContentRewriterTest.java
URL: http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/rewrite/LinkingTagContentRewriterTest.java?rev=707804&r1=707803&r2=707804&view=diff
==============================================================================
--- incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/rewrite/LinkingTagContentRewriterTest.java (original)
+++ incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/rewrite/LinkingTagContentRewriterTest.java Fri Oct 24 18:17:10 2008
@@ -18,21 +18,26 @@
  */
 package org.apache.shindig.gadgets.rewrite;
 
-import java.net.URI;
+import com.google.inject.Guice;
+import com.google.inject.Injector;
+import org.apache.shindig.gadgets.parse.GadgetHtmlParser;
+import org.apache.shindig.gadgets.parse.ParseModule;
+import org.w3c.dom.Document;
 
-import org.apache.shindig.gadgets.parse.GadgetHtmlNodeTest;
-import org.apache.shindig.gadgets.parse.ParsedHtmlNode;
+import java.net.URI;
 
 public class LinkingTagContentRewriterTest extends FeatureBasedRewriterTestBase {
-  private LinkRewriter pfxLinkRewriter;
   private LinkingTagContentRewriter rewriter;
+  private GadgetHtmlParser htmlParser;
   
   private static final String LINK_PREFIX = "px-";
 
   @Override
   protected void setUp() throws Exception {
     super.setUp();
-    pfxLinkRewriter = new LinkRewriter() {
+    Injector injector = Guice.createInjector(new ParseModule());
+    htmlParser = injector.getInstance(GadgetHtmlParser.class);
+    LinkRewriter pfxLinkRewriter = new LinkRewriter() {
       public String rewrite(String uri, URI context) {
         // Just prefixes with LINK_PREFIX
         return LINK_PREFIX + uri;
@@ -42,41 +47,29 @@
   }
   
   public void testLinkingTagStandardRewrite() throws Exception {
-    String s = "<img src=\"http://a.b.com/img.gif\"></img>\n"
-        + "<IMG src=\"http://a.b.com/img2.gif\"/>\n"
-        + "<eMbeD src=\"http://a.b.com/some.mov\"/>\n"
+    String s = "<img src=\"http://a.b.com/img.gif\"></img>"
+        + "<IMG src=\"http://a.b.com/img2.gif\"/>"
+        + "<eMbeD src=\"http://a.b.com/some.mov\"/>"
         + "<link href=\"http://a.b.com/link.html\"></link>";
-    String[][] img1attrib = { { "src", "http://a.b.com/img.gif" } };
-    String[][] img2attrib = { { "src", "http://a.b.com/img2.gif" } };
-    String[][] emb1attrib = { { "src", "http://a.b.com/some.mov" } };
-    String[][] href1attr = { { "href", "http://a.b.com/link.html" } };
-    ParsedHtmlNode[] p = {
-        GadgetHtmlNodeTest.makeParsedTagNode("img", img1attrib, null),
-        GadgetHtmlNodeTest.makeParsedTextNode("\n"),
-        GadgetHtmlNodeTest.makeParsedTagNode("IMG", img2attrib, null),
-        GadgetHtmlNodeTest.makeParsedTextNode("\n"),
-        GadgetHtmlNodeTest.makeParsedTagNode("eMbeD", emb1attrib, null),
-        GadgetHtmlNodeTest.makeParsedTextNode("\n"),
-        GadgetHtmlNodeTest.makeParsedTagNode("link", href1attr, null)
-    };
-    String rewritten = "<img src=\"" + LINK_PREFIX + "http://a.b.com/img.gif\"/>\n"
-        + "<IMG src=\"" + LINK_PREFIX + "http://a.b.com/img2.gif\"/>\n"
-        + "<eMbeD src=\"" + LINK_PREFIX + "http://a.b.com/some.mov\"/>\n"
-        + "<link href=\"" + LINK_PREFIX + "http://a.b.com/link.html\"/>";
-    assertEquals(rewritten, rewriteHelper(rewriter, s, p));
+    String expected = "<IMG src=\"" + LINK_PREFIX + "http://a.b.com/img.gif\">"
+        + "<IMG src=\"" + LINK_PREFIX + "http://a.b.com/img2.gif\">"
+        + "<EMBED src=\"" + LINK_PREFIX + "http://a.b.com/some.mov\"></EMBED>"
+        + "<LINK href=\"" + LINK_PREFIX + "http://a.b.com/link.html\">";
+    Document document = htmlParser.parseDom(s);
+    String rewritten = rewriteHelper(rewriter, s, document);
+    assertEquals(rewritten, expected);
   }
   
   public void testLinkingTagIgnoredWithNoRewriter() throws Exception {
     String s = "<img src=\"http://a.b.com/img.gif\"></img>";
-    String[][] img1attrib = { { "src", "http://a.b.com/img.gif" } };
-    ParsedHtmlNode[] p = {
-        GadgetHtmlNodeTest.makeParsedTagNode("img", img1attrib, null),
-    };
-    assertEquals(s, rewriteHelper(new LinkingTagContentRewriter(null, null), s, p));
+    Document document = htmlParser.parseDom(s);
+    String rewritten = rewriteHelper(new LinkingTagContentRewriter(null, null), s, document);
+    assertEquals(s, rewritten);
   }
   
   public void testLinkingTagIgnoredWithBadParse() throws Exception {
     String s = "<img src=\"http://a.b.com/img.gif></img>";
-    assertEquals(s, rewriteHelper(rewriter, s, null));  // null = couldn't parse
+    String rewritten = rewriteHelper(rewriter, s, null);
+    assertEquals(s, rewritten);  // null = couldn't parse
   }
 }

Modified: incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/rewrite/MutableContentTest.java
URL: http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/rewrite/MutableContentTest.java?rev=707804&r1=707803&r2=707804&view=diff
==============================================================================
--- incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/rewrite/MutableContentTest.java (original)
+++ incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/rewrite/MutableContentTest.java Fri Oct 24 18:17:10 2008
@@ -18,17 +18,15 @@
  */
 package org.apache.shindig.gadgets.rewrite;
 
-import org.apache.shindig.gadgets.parse.GadgetHtmlNode;
+import com.google.inject.Guice;
+import com.google.inject.Injector;
+import org.apache.shindig.gadgets.parse.GadgetHtmlParser;
 import org.apache.shindig.gadgets.parse.ParseModule;
-import org.apache.shindig.gadgets.parse.caja.CajaHtmlParser;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotSame;
-import static org.junit.Assert.assertSame;
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
+import static org.junit.Assert.*;
 import org.junit.Before;
 import org.junit.Test;
+import org.w3c.dom.Document;
+import org.w3c.dom.Node;
 
 public class MutableContentTest {
   private MutableContent mhc;
@@ -38,7 +36,8 @@
     // Note dependency on CajaHtmlParser - this isn't particularly ideal but is
     // sufficient given that this test doesn't exercise the parser extensively at all,
     // instead focusing on the additional utility provided by MutableHtmlContent
-    mhc = new MutableContent(new CajaHtmlParser(new ParseModule.HTMLDocumentProvider()));
+    Injector injector = Guice.createInjector(new ParseModule());
+    mhc = new MutableContent(injector.getInstance(GadgetHtmlParser.class));
     mhc.setContent("DEFAULT VIEW");
   }
   
@@ -47,64 +46,69 @@
     String content = mhc.getContent();
     assertEquals("DEFAULT VIEW", content);
   
-    GadgetHtmlNode root = mhc.getParseTree();
-    assertEquals(1, root.getChildren().size());
-    assertTrue(root.getChildren().get(0).isText());
-    assertEquals(content, root.getChildren().get(0).getText());
+    Document document = mhc.getDocument();
+    assertEquals(1, document.getFirstChild().getChildNodes().getLength());
+    assertTrue(document.getFirstChild().getChildNodes().item(0).getNodeType() == Node.TEXT_NODE);
+    assertEquals(content, document.getFirstChild().getChildNodes().item(0).getTextContent());
   
     assertSame(content, mhc.getContent());
-    assertSame(root, mhc.getParseTree());
+    assertSame(document, mhc.getDocument());
   }
   
   @Test
   public void modifyContentReflectedInTree() throws Exception {
     mhc.setContent("NEW CONTENT");
-    GadgetHtmlNode root = mhc.getParseTree();
-    assertEquals(1, root.getChildren().size());
-    assertEquals("NEW CONTENT", root.getChildren().get(0).getText());
+    Document document = mhc.getDocument();
+    assertEquals(1, document.getChildNodes().getLength());
+    assertEquals("NEW CONTENT", document.getChildNodes().item(0).getTextContent());
   }
   
   @Test
   public void modifyTreeReflectedInContent() throws Exception {
-    GadgetHtmlNode root = mhc.getParseTree();
+    Document document = mhc.getDocument();
   
     // First child should be text node per other tests. Modify it.
-    root.getChildren().get(0).setText("FOO CONTENT");
-    assertEquals("FOO CONTENT", mhc.getContent());
-  
+    document.getFirstChild().getFirstChild().setTextContent("FOO CONTENT");
+    MutableContent.notifyEdit(document);
+    assertTrue(mhc.getContent().contains("FOO CONTENT"));
+
     // Do it again
-    root.getChildren().get(0).setText("BAR CONTENT");
-    assertEquals("BAR CONTENT", mhc.getContent());
+    document.getFirstChild().getFirstChild().setTextContent("BAR CONTENT");
+    MutableContent.notifyEdit(document);
+    assertTrue(mhc.getContent().contains("BAR CONTENT"));
   
     // GadgetHtmlNode hasn't changed because string hasn't changed
-    assertSame(root, mhc.getParseTree());
+    assertSame(document, mhc.getDocument());
   }
   
   @Test
   public void staleTreeEditsInvalidatedAfterContentSet() throws Exception {
-    GadgetHtmlNode firstRoot = mhc.getParseTree();
+    Document document = mhc.getDocument();
   
     // Re-set content
     mhc.setContent("INVALIDATING CONTENT");
   
     // Should still be able to obtain this.
-    GadgetHtmlNode secondRoot = mhc.getParseTree();
-    assertNotSame(firstRoot, secondRoot);
+    Document document2 = mhc.getDocument();
+    assertNotSame(document, document2);
   
     // Should be able to *obtain* first child node...
-    GadgetHtmlNode firstTextNode = firstRoot.getChildren().get(0);
+    Node firstTextNode = document.getFirstChild().getChildNodes().item(0);
     try {
       // ...but not edit it.
-      firstTextNode.setText("STALE-SET CONTENT");
+      firstTextNode.setTextContent("STALE-SET CONTENT");
+      MutableContent.notifyEdit(document);
       fail("Should not be able to modify stale parse tree");
     } catch (IllegalStateException e) {
       // Expected condition.
     }
   
-    assertEquals("INVALIDATING CONTENT", secondRoot.getChildren().get(0).getText());
+    assertEquals("INVALIDATING CONTENT",
+        document2.getFirstChild().getChildNodes().item(0).getTextContent());
   
     // For good measure, modify secondRoot and get content
-    secondRoot.getChildren().get(0).setText("NEW CONTENT");
-    assertEquals("NEW CONTENT", mhc.getContent());
+    document2.getFirstChild().getChildNodes().item(0).setTextContent("NEW CONTENT");
+    MutableContent.notifyEdit(document2);
+    assertTrue(mhc.getContent().contains("NEW CONTENT"));
   }
 }