You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@shindig.apache.org by ga...@apache.org on 2010/09/16 18:45:34 UTC

svn commit: r997836 - in /shindig/trunk/java: common/conf/ gadgets/src/main/java/org/apache/shindig/gadgets/parse/ gadgets/src/main/java/org/apache/shindig/gadgets/parse/caja/ gadgets/src/test/java/org/apache/shindig/gadgets/parse/caja/

Author: gagan
Date: Thu Sep 16 16:45:34 2010
New Revision: 997836

URL: http://svn.apache.org/viewvc?rev=997836&view=rev
Log:
Patch by gagan.goku. http://codereview.appspot.com/2006042/.  Vanilla Caja html parser

Added:
    shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/caja/VanillaCajaHtmlParser.java   (with props)
    shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/caja/VanillaCajaHtmlSerializer.java   (with props)
    shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/caja/VanillaCajaHtmlParserTest.java   (with props)
Modified:
    shindig/trunk/java/common/conf/shindig.properties
    shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/DefaultHtmlSerializer.java

Modified: shindig/trunk/java/common/conf/shindig.properties
URL: http://svn.apache.org/viewvc/shindig/trunk/java/common/conf/shindig.properties?rev=997836&r1=997835&r2=997836&view=diff
==============================================================================
--- shindig/trunk/java/common/conf/shindig.properties (original)
+++ shindig/trunk/java/common/conf/shindig.properties Thu Sep 16 16:45:34 2010
@@ -147,3 +147,6 @@ shindig.json-rpc.result-field=result
 # the one that threw the exception.
 shindig.accelerate.remapInternalServerError=true
 shindig.proxy.remapInternalServerError=true
+
+# Add debug data when using VanillaCajaHtmlParser.
+vanillaCajaParser.needsDebugData=true

Modified: shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/DefaultHtmlSerializer.java
URL: http://svn.apache.org/viewvc/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/DefaultHtmlSerializer.java?rev=997836&r1=997835&r2=997836&view=diff
==============================================================================
--- shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/DefaultHtmlSerializer.java (original)
+++ shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/DefaultHtmlSerializer.java Thu Sep 16 16:45:34 2010
@@ -138,7 +138,7 @@ public class DefaultHtmlSerializer imple
     output.append("<!--").append(n.getNodeValue()).append("-->");
   }
 
-  private void outputDocType(DocumentType docType, Appendable output) throws IOException {
+  public static void outputDocType(DocumentType docType, Appendable output) throws IOException {
     output.append("<!DOCTYPE ");
     // Use this so name matches case for XHTML
     output.append(docType.getOwnerDocument().getDocumentElement().getNodeName());

Added: shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/caja/VanillaCajaHtmlParser.java
URL: http://svn.apache.org/viewvc/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/caja/VanillaCajaHtmlParser.java?rev=997836&view=auto
==============================================================================
--- shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/caja/VanillaCajaHtmlParser.java (added)
+++ shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/caja/VanillaCajaHtmlParser.java Thu Sep 16 16:45:34 2010
@@ -0,0 +1,89 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ */
+package org.apache.shindig.gadgets.parse.caja;
+
+import com.google.caja.lexer.*;
+import com.google.caja.parser.html.DomParser;
+import com.google.caja.reporting.MessageQueue;
+import com.google.caja.reporting.SimpleMessageQueue;
+import com.google.inject.Inject;
+import com.google.inject.name.Named;
+import org.apache.shindig.gadgets.GadgetException;
+import org.apache.shindig.gadgets.http.HttpResponse;
+import org.apache.shindig.gadgets.parse.GadgetHtmlParser;
+import org.apache.shindig.gadgets.parse.HtmlSerialization;
+import org.w3c.dom.DOMImplementation;
+import org.w3c.dom.Document;
+import org.w3c.dom.DocumentFragment;
+
+/**
+ * Simple html parser based on caja.
+ */
+public class VanillaCajaHtmlParser extends GadgetHtmlParser {
+  private final boolean needsDebugData;
+
+  @Inject
+  public VanillaCajaHtmlParser(DOMImplementation documentFactory,
+                               @Named("vanillaCajaParser.needsDebugData")
+                               boolean needsDebugData) {
+    super(documentFactory);
+    this.needsDebugData = needsDebugData;
+  }
+
+  @Override
+  public Document parseDom(String source) throws GadgetException {
+    // TODO: Add support for caching the DOM after evaluation.
+    return parseDomImpl(source);
+  }
+
+  private DomParser getDomParser(String source, final MessageQueue mq) throws ParseException {
+    InputSource is = InputSource.UNKNOWN;
+    HtmlLexer lexer = new HtmlLexer(CharProducer.Factory.fromString(source, is));
+    TokenQueue<HtmlTokenType> tokenQueue = new TokenQueue<HtmlTokenType>(
+        lexer, is);
+    DomParser parser = new DomParser(tokenQueue, /** asXml */ false, mq);
+
+    parser.setDomImpl(documentFactory);
+    parser.setNeedsDebugData(needsDebugData);
+    return parser;
+  }
+
+  @Override
+  protected Document parseDomImpl(String source) throws GadgetException {
+    MessageQueue mq = new SimpleMessageQueue();
+    try {
+      DomParser parser = getDomParser(source, mq);
+      Document doc = parser.parseDocument().getOwnerDocument();
+
+      VanillaCajaHtmlSerializer serializer = new VanillaCajaHtmlSerializer();
+      HtmlSerialization.attach(doc, serializer, null);
+      return doc;
+    } catch (ParseException e) {
+      throw new GadgetException(GadgetException.Code.HTML_PARSE_ERROR,
+          e.getCajaMessage().toString(), HttpResponse.SC_INTERNAL_SERVER_ERROR);
+    } catch (NullPointerException e) {
+      throw new GadgetException(GadgetException.Code.INTERNAL_SERVER_ERROR, e);
+    }
+  }
+
+  @Override
+  protected DocumentFragment parseFragmentImpl(String source)
+      throws GadgetException {
+    throw new UnsupportedOperationException("Use parseDom instead.");
+  }
+}

Propchange: shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/caja/VanillaCajaHtmlParser.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/caja/VanillaCajaHtmlSerializer.java
URL: http://svn.apache.org/viewvc/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/caja/VanillaCajaHtmlSerializer.java?rev=997836&view=auto
==============================================================================
--- shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/caja/VanillaCajaHtmlSerializer.java (added)
+++ shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/caja/VanillaCajaHtmlSerializer.java Thu Sep 16 16:45:34 2010
@@ -0,0 +1,47 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ */
+package org.apache.shindig.gadgets.parse.caja;
+
+import com.google.caja.parser.html.Nodes;
+import com.google.caja.render.Concatenator;
+import com.google.caja.reporting.RenderContext;
+import org.apache.shindig.gadgets.parse.DefaultHtmlSerializer;
+import org.apache.shindig.gadgets.parse.HtmlSerialization;
+import org.apache.shindig.gadgets.parse.HtmlSerializer;
+import org.w3c.dom.Document;
+
+import java.io.IOException;
+import java.io.StringWriter;
+
+/**
+ * Serializer for VanillaCajaHtmlParser.
+ */
+public class VanillaCajaHtmlSerializer implements HtmlSerializer {
+  public String serialize(Document doc) {
+    try {
+      StringWriter sw = HtmlSerialization.createWriter(doc);
+      if (doc.getDoctype() != null) {
+        DefaultHtmlSerializer.outputDocType(doc.getDoctype(), sw);
+      }
+      sw.append(Nodes.render(doc, new RenderContext(new Concatenator(sw, null)).asXml()));
+      return sw.toString();
+    } catch (IOException e) {
+      return null;
+    }
+  }
+}

Propchange: shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/caja/VanillaCajaHtmlSerializer.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/caja/VanillaCajaHtmlParserTest.java
URL: http://svn.apache.org/viewvc/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/caja/VanillaCajaHtmlParserTest.java?rev=997836&view=auto
==============================================================================
--- shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/caja/VanillaCajaHtmlParserTest.java (added)
+++ shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/caja/VanillaCajaHtmlParserTest.java Thu Sep 16 16:45:34 2010
@@ -0,0 +1,108 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ */
+package org.apache.shindig.gadgets.parse.caja;
+
+import org.apache.shindig.gadgets.GadgetException;
+import org.junit.Before;
+import org.junit.Test;
+import org.w3c.dom.DOMImplementation;
+import org.w3c.dom.bootstrap.DOMImplementationRegistry;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+/**
+ * Tests for VanillaCajaHtmlParser.
+ */
+public class VanillaCajaHtmlParserTest {
+  private VanillaCajaHtmlParser parser;
+  private VanillaCajaHtmlSerializer serializer;
+
+  @Before
+  public void setUp() throws Exception {
+    DOMImplementationRegistry registry = DOMImplementationRegistry.newInstance();
+    // Require the traversal API
+    DOMImplementation domImpl = registry.getDOMImplementation("XML 1.0 Traversal 2.0");
+    parser = new VanillaCajaHtmlParser(domImpl, true);
+    serializer = new VanillaCajaHtmlSerializer();
+  }
+
+  @Test
+  public void testEmptyDocument() throws Exception {
+    boolean exceptionCaught = false;
+    try {
+      parser.parseDom("");
+    } catch (GadgetException e) {
+      exceptionCaught = true;
+    }
+    assertTrue(exceptionCaught);
+  }
+
+  // Bad behavior by Caja DomParser. Bug to be raised with Caja team.
+  // Caja should not parse such javascript as html. Ideally it should throw an
+  // exception indicating non html content.
+  // TODO: Update test case when the issue is fixed.
+  @Test
+  public void testNonHtml() throws Exception {
+    String html = "var hello=\"world\";";
+    String expected = "<html><head></head><body>var hello=&#34;world&#34;;"
+                      + "</body></html>";
+    assertEquals(expected, serializer.serialize(parser.parseDom(html)));
+  }
+
+  @Test
+  public void testNoHead() throws Exception {
+    String html = "<html><body><a href=\"hello\"></a></body></html>";
+    String expected = "<html><head></head><body><a href=\"hello\"></a>"
+                      + "</body></html>";
+    assertEquals(expected, serializer.serialize(parser.parseDom(html)));
+  }
+
+  @Test
+  public void testParseAndSerialize() throws Exception {
+    String html = "<html><head><script src=\"1.js\"></script></head>"
+                  + "<body><a href=\"hello\"></a></body></html>";
+    String expected = "<html><head><script src=\"1.js\"></script></head>"
+                      + "<body><a href=\"hello\"></a>"
+                      + "</body></html>";
+    assertEquals(expected, serializer.serialize(parser.parseDom(html)));
+  }
+
+  @Test
+  public void testUnbalanced() throws Exception {
+    String html = "<html><head><script src=\"1.js\"></script></head>"
+                  + "<body><p><embed></p></embed></body></html>";
+    String expected = "<html><head><script src=\"1.js\"></script></head>"
+                      + "<body><p><embed /></p>"
+                      + "</body></html>";
+    assertEquals(expected, serializer.serialize(parser.parseDom(html)));
+  }
+
+  // Weird case of normalization. Chrome and Firefox do not seem to execute the
+  // script since there is no closing </script> tag. Hence Caja is consistent
+  // with modern browsers.
+  @Test
+  public void testBadTagBalancing() throws Exception {
+    String html = "<html><head><script src=\"1.js\"></head>"
+                  + "<body></body></html>";
+    String expected = "<html><head><script src=\"1.js\">"
+                      + "</head><body></body></html>"
+                      + "</script></head><body></body></html>";
+    assertEquals(expected, serializer.serialize(parser.parseDom(html)));
+  }
+}

Propchange: shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/caja/VanillaCajaHtmlParserTest.java
------------------------------------------------------------------------------
    svn:eol-style = native