You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@shindig.apache.org by ga...@apache.org on 2010/09/16 18:45:34 UTC
svn commit: r997836 - in /shindig/trunk/java: common/conf/
gadgets/src/main/java/org/apache/shindig/gadgets/parse/
gadgets/src/main/java/org/apache/shindig/gadgets/parse/caja/
gadgets/src/test/java/org/apache/shindig/gadgets/parse/caja/
Author: gagan
Date: Thu Sep 16 16:45:34 2010
New Revision: 997836
URL: http://svn.apache.org/viewvc?rev=997836&view=rev
Log:
Patch by gagan.goku. http://codereview.appspot.com/2006042/. Vanilla Caja html parser
Added:
shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/caja/VanillaCajaHtmlParser.java (with props)
shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/caja/VanillaCajaHtmlSerializer.java (with props)
shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/caja/VanillaCajaHtmlParserTest.java (with props)
Modified:
shindig/trunk/java/common/conf/shindig.properties
shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/DefaultHtmlSerializer.java
Modified: shindig/trunk/java/common/conf/shindig.properties
URL: http://svn.apache.org/viewvc/shindig/trunk/java/common/conf/shindig.properties?rev=997836&r1=997835&r2=997836&view=diff
==============================================================================
--- shindig/trunk/java/common/conf/shindig.properties (original)
+++ shindig/trunk/java/common/conf/shindig.properties Thu Sep 16 16:45:34 2010
@@ -147,3 +147,6 @@ shindig.json-rpc.result-field=result
# the one that threw the exception.
shindig.accelerate.remapInternalServerError=true
shindig.proxy.remapInternalServerError=true
+
+# Add debug data when using VanillaCajaHtmlParser.
+vanillaCajaParser.needsDebugData=true
Modified: shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/DefaultHtmlSerializer.java
URL: http://svn.apache.org/viewvc/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/DefaultHtmlSerializer.java?rev=997836&r1=997835&r2=997836&view=diff
==============================================================================
--- shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/DefaultHtmlSerializer.java (original)
+++ shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/DefaultHtmlSerializer.java Thu Sep 16 16:45:34 2010
@@ -138,7 +138,7 @@ public class DefaultHtmlSerializer imple
output.append("<!--").append(n.getNodeValue()).append("-->");
}
- private void outputDocType(DocumentType docType, Appendable output) throws IOException {
+ public static void outputDocType(DocumentType docType, Appendable output) throws IOException {
output.append("<!DOCTYPE ");
// Use this so name matches case for XHTML
output.append(docType.getOwnerDocument().getDocumentElement().getNodeName());
Added: shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/caja/VanillaCajaHtmlParser.java
URL: http://svn.apache.org/viewvc/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/caja/VanillaCajaHtmlParser.java?rev=997836&view=auto
==============================================================================
--- shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/caja/VanillaCajaHtmlParser.java (added)
+++ shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/caja/VanillaCajaHtmlParser.java Thu Sep 16 16:45:34 2010
@@ -0,0 +1,89 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ */
+package org.apache.shindig.gadgets.parse.caja;
+
+import com.google.caja.lexer.*;
+import com.google.caja.parser.html.DomParser;
+import com.google.caja.reporting.MessageQueue;
+import com.google.caja.reporting.SimpleMessageQueue;
+import com.google.inject.Inject;
+import com.google.inject.name.Named;
+import org.apache.shindig.gadgets.GadgetException;
+import org.apache.shindig.gadgets.http.HttpResponse;
+import org.apache.shindig.gadgets.parse.GadgetHtmlParser;
+import org.apache.shindig.gadgets.parse.HtmlSerialization;
+import org.w3c.dom.DOMImplementation;
+import org.w3c.dom.Document;
+import org.w3c.dom.DocumentFragment;
+
+/**
+ * Simple html parser based on caja.
+ */
+public class VanillaCajaHtmlParser extends GadgetHtmlParser {
+ private final boolean needsDebugData;
+
+ @Inject
+ public VanillaCajaHtmlParser(DOMImplementation documentFactory,
+ @Named("vanillaCajaParser.needsDebugData")
+ boolean needsDebugData) {
+ super(documentFactory);
+ this.needsDebugData = needsDebugData;
+ }
+
+ @Override
+ public Document parseDom(String source) throws GadgetException {
+ // TODO: Add support for caching the DOM after evaluation.
+ return parseDomImpl(source);
+ }
+
+ private DomParser getDomParser(String source, final MessageQueue mq) throws ParseException {
+ InputSource is = InputSource.UNKNOWN;
+ HtmlLexer lexer = new HtmlLexer(CharProducer.Factory.fromString(source, is));
+ TokenQueue<HtmlTokenType> tokenQueue = new TokenQueue<HtmlTokenType>(
+ lexer, is);
+ DomParser parser = new DomParser(tokenQueue, /** asXml */ false, mq);
+
+ parser.setDomImpl(documentFactory);
+ parser.setNeedsDebugData(needsDebugData);
+ return parser;
+ }
+
+ @Override
+ protected Document parseDomImpl(String source) throws GadgetException {
+ MessageQueue mq = new SimpleMessageQueue();
+ try {
+ DomParser parser = getDomParser(source, mq);
+ Document doc = parser.parseDocument().getOwnerDocument();
+
+ VanillaCajaHtmlSerializer serializer = new VanillaCajaHtmlSerializer();
+ HtmlSerialization.attach(doc, serializer, null);
+ return doc;
+ } catch (ParseException e) {
+ throw new GadgetException(GadgetException.Code.HTML_PARSE_ERROR,
+ e.getCajaMessage().toString(), HttpResponse.SC_INTERNAL_SERVER_ERROR);
+ } catch (NullPointerException e) {
+ throw new GadgetException(GadgetException.Code.INTERNAL_SERVER_ERROR, e);
+ }
+ }
+
+ @Override
+ protected DocumentFragment parseFragmentImpl(String source)
+ throws GadgetException {
+ throw new UnsupportedOperationException("Use parseDom instead.");
+ }
+}
Propchange: shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/caja/VanillaCajaHtmlParser.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/caja/VanillaCajaHtmlSerializer.java
URL: http://svn.apache.org/viewvc/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/caja/VanillaCajaHtmlSerializer.java?rev=997836&view=auto
==============================================================================
--- shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/caja/VanillaCajaHtmlSerializer.java (added)
+++ shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/caja/VanillaCajaHtmlSerializer.java Thu Sep 16 16:45:34 2010
@@ -0,0 +1,47 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ */
+package org.apache.shindig.gadgets.parse.caja;
+
+import com.google.caja.parser.html.Nodes;
+import com.google.caja.render.Concatenator;
+import com.google.caja.reporting.RenderContext;
+import org.apache.shindig.gadgets.parse.DefaultHtmlSerializer;
+import org.apache.shindig.gadgets.parse.HtmlSerialization;
+import org.apache.shindig.gadgets.parse.HtmlSerializer;
+import org.w3c.dom.Document;
+
+import java.io.IOException;
+import java.io.StringWriter;
+
+/**
+ * Serializer for VanillaCajaHtmlParser.
+ */
+public class VanillaCajaHtmlSerializer implements HtmlSerializer {
+ public String serialize(Document doc) {
+ try {
+ StringWriter sw = HtmlSerialization.createWriter(doc);
+ if (doc.getDoctype() != null) {
+ DefaultHtmlSerializer.outputDocType(doc.getDoctype(), sw);
+ }
+ sw.append(Nodes.render(doc, new RenderContext(new Concatenator(sw, null)).asXml()));
+ return sw.toString();
+ } catch (IOException e) {
+ return null;
+ }
+ }
+}
Propchange: shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/caja/VanillaCajaHtmlSerializer.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/caja/VanillaCajaHtmlParserTest.java
URL: http://svn.apache.org/viewvc/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/caja/VanillaCajaHtmlParserTest.java?rev=997836&view=auto
==============================================================================
--- shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/caja/VanillaCajaHtmlParserTest.java (added)
+++ shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/caja/VanillaCajaHtmlParserTest.java Thu Sep 16 16:45:34 2010
@@ -0,0 +1,108 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ */
+package org.apache.shindig.gadgets.parse.caja;
+
+import org.apache.shindig.gadgets.GadgetException;
+import org.junit.Before;
+import org.junit.Test;
+import org.w3c.dom.DOMImplementation;
+import org.w3c.dom.bootstrap.DOMImplementationRegistry;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+/**
+ * Tests for VanillaCajaHtmlParser.
+ */
+public class VanillaCajaHtmlParserTest {
+ private VanillaCajaHtmlParser parser;
+ private VanillaCajaHtmlSerializer serializer;
+
+ @Before
+ public void setUp() throws Exception {
+ DOMImplementationRegistry registry = DOMImplementationRegistry.newInstance();
+ // Require the traversal API
+ DOMImplementation domImpl = registry.getDOMImplementation("XML 1.0 Traversal 2.0");
+ parser = new VanillaCajaHtmlParser(domImpl, true);
+ serializer = new VanillaCajaHtmlSerializer();
+ }
+
+ @Test
+ public void testEmptyDocument() throws Exception {
+ boolean exceptionCaught = false;
+ try {
+ parser.parseDom("");
+ } catch (GadgetException e) {
+ exceptionCaught = true;
+ }
+ assertTrue(exceptionCaught);
+ }
+
+ // Bad behavior by Caja DomParser. Bug to be raised with Caja team.
+ // Caja should not parse such javascript as html. Ideally it should throw an
+ // exception indicating non html content.
+ // TODO: Update test case when the issue is fixed.
+ @Test
+ public void testNonHtml() throws Exception {
+ String html = "var hello=\"world\";";
+ String expected = "<html><head></head><body>var hello="world";"
+ + "</body></html>";
+ assertEquals(expected, serializer.serialize(parser.parseDom(html)));
+ }
+
+ @Test
+ public void testNoHead() throws Exception {
+ String html = "<html><body><a href=\"hello\"></a></body></html>";
+ String expected = "<html><head></head><body><a href=\"hello\"></a>"
+ + "</body></html>";
+ assertEquals(expected, serializer.serialize(parser.parseDom(html)));
+ }
+
+ @Test
+ public void testParseAndSerialize() throws Exception {
+ String html = "<html><head><script src=\"1.js\"></script></head>"
+ + "<body><a href=\"hello\"></a></body></html>";
+ String expected = "<html><head><script src=\"1.js\"></script></head>"
+ + "<body><a href=\"hello\"></a>"
+ + "</body></html>";
+ assertEquals(expected, serializer.serialize(parser.parseDom(html)));
+ }
+
+ @Test
+ public void testUnbalanced() throws Exception {
+ String html = "<html><head><script src=\"1.js\"></script></head>"
+ + "<body><p><embed></p></embed></body></html>";
+ String expected = "<html><head><script src=\"1.js\"></script></head>"
+ + "<body><p><embed /></p>"
+ + "</body></html>";
+ assertEquals(expected, serializer.serialize(parser.parseDom(html)));
+ }
+
+ // Weird case of normalization. Chrome and Firefox do not seem to execute the
+ // script since there is no closing </script> tag. Hence Caja is consistent
+ // with modern browsers.
+ @Test
+ public void testBadTagBalancing() throws Exception {
+ String html = "<html><head><script src=\"1.js\"></head>"
+ + "<body></body></html>";
+ String expected = "<html><head><script src=\"1.js\">"
+ + "</head><body></body></html>"
+ + "</script></head><body></body></html>";
+ assertEquals(expected, serializer.serialize(parser.parseDom(html)));
+ }
+}
Propchange: shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/parse/caja/VanillaCajaHtmlParserTest.java
------------------------------------------------------------------------------
svn:eol-style = native