You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sling.apache.org by fm...@apache.org on 2015/01/23 15:16:01 UTC
svn commit: r1654218 - in /sling/trunk/contrib/scripting/sightly/engine/src:
main/java/org/apache/sling/scripting/sightly/impl/html/dom/
test/java/org/apache/sling/scripting/sightly/impl/html/
test/java/org/apache/sling/scripting/sightly/impl/html/dom/
Author: fmeschbe
Date: Fri Jan 23 14:16:00 2015
New Revision: 1654218
URL: http://svn.apache.org/r1654218
Log:
SLING-4346 Sightly: HtmlParser does not parse documents correctly when comments span across internal char buffer size
Apply patch by Vlad Bailescu (thanks a lot)
Added:
sling/trunk/contrib/scripting/sightly/engine/src/test/java/org/apache/sling/scripting/sightly/impl/html/
sling/trunk/contrib/scripting/sightly/engine/src/test/java/org/apache/sling/scripting/sightly/impl/html/dom/
sling/trunk/contrib/scripting/sightly/engine/src/test/java/org/apache/sling/scripting/sightly/impl/html/dom/HtmlParserTest.java (with props)
Modified:
sling/trunk/contrib/scripting/sightly/engine/src/main/java/org/apache/sling/scripting/sightly/impl/html/dom/HtmlParser.java
Modified: sling/trunk/contrib/scripting/sightly/engine/src/main/java/org/apache/sling/scripting/sightly/impl/html/dom/HtmlParser.java
URL: http://svn.apache.org/viewvc/sling/trunk/contrib/scripting/sightly/engine/src/main/java/org/apache/sling/scripting/sightly/impl/html/dom/HtmlParser.java?rev=1654218&r1=1654217&r2=1654218&view=diff
==============================================================================
--- sling/trunk/contrib/scripting/sightly/engine/src/main/java/org/apache/sling/scripting/sightly/impl/html/dom/HtmlParser.java (original)
+++ sling/trunk/contrib/scripting/sightly/engine/src/main/java/org/apache/sling/scripting/sightly/impl/html/dom/HtmlParser.java Fri Jan 23 14:16:00 2015
@@ -27,6 +27,8 @@ import java.io.Reader;
*/
public final class HtmlParser {
+ private static int BUF_SIZE = 2048;
+
/** Internal character buffer */
private final CharArrayWriter buffer = new CharArrayWriter(256);
@@ -101,7 +103,7 @@ public final class HtmlParser {
throws IOException {
try {
this.documentHandler.onStart();
- final char[] readBuffer = new char[2048];
+ final char[] readBuffer = new char[BUF_SIZE];
int readLen = 0;
while ( (readLen = reader.read(readBuffer)) > 0 ) {
this.update(readBuffer, readLen);
@@ -163,7 +165,7 @@ public final class HtmlParser {
parseState = PARSE_STATE.COMMENT;
parseSubState = 0;
tagType = TT_NONE;
- flushBuffer();
+ // keep the accumulated buffer
} else if (c == '"' || c == '\'') {
quoteChar = c;
prevParseState = parseState;
@@ -281,7 +283,7 @@ public final class HtmlParser {
case 4:
if (c == '>') {
parseState = PARSE_STATE.OUTSIDE;
- documentHandler.onComment(new String(buf, start, curr - start + 1));
+ processComment(buf, start, curr - start + 1);
start = curr + 1;
} else {
parseSubState = 2;
@@ -398,7 +400,7 @@ public final class HtmlParser {
}
}
if (start < end) {
- if (tagType == TT_NONE) {
+ if (tagType == TT_NONE && parseState != PARSE_STATE.COMMENT) {
documentHandler.onCharacters(buf, start, end - start);
} else {
buffer.write(buf, start, end - start);
@@ -439,6 +441,20 @@ public final class HtmlParser {
}
/**
+ * Process a comment from current and accumulated character data
+ *
+ * @param ch character data work buffer
+ * @param off start offset for current data
+ * @param len length of current data
+ * @throws IOException
+ */
+ private void processComment(char[] ch, int off, int len) throws IOException {
+ buffer.write(ch, off, len);
+ documentHandler.onComment(buffer.toString());
+ buffer.reset();
+ }
+
+ /**
* Decompose a tag and feed it to the document handler.
*
* @param ch
Added: sling/trunk/contrib/scripting/sightly/engine/src/test/java/org/apache/sling/scripting/sightly/impl/html/dom/HtmlParserTest.java
URL: http://svn.apache.org/viewvc/sling/trunk/contrib/scripting/sightly/engine/src/test/java/org/apache/sling/scripting/sightly/impl/html/dom/HtmlParserTest.java?rev=1654218&view=auto
==============================================================================
--- sling/trunk/contrib/scripting/sightly/engine/src/test/java/org/apache/sling/scripting/sightly/impl/html/dom/HtmlParserTest.java (added)
+++ sling/trunk/contrib/scripting/sightly/engine/src/test/java/org/apache/sling/scripting/sightly/impl/html/dom/HtmlParserTest.java Fri Jan 23 14:16:00 2015
@@ -0,0 +1,188 @@
+/*******************************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ ******************************************************************************/
+package org.apache.sling.scripting.sightly.impl.html.dom;
+
+import java.io.StringReader;
+import java.util.List;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.sling.scripting.sightly.impl.html.dom.template.*;
+
+import org.junit.Test;
+import org.powermock.reflect.Whitebox;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+public class HtmlParserTest {
+
+ /**
+ * Pretty print a template node structure for debugging of failed tests
+ *
+ * @param indentation - indentation level (the method is used recursively)
+ * @param node - template node to print
+ */
+ private static void print(int indentation, TemplateNode node) {
+ if (node == null) {
+ return;
+ }
+
+ List<TemplateNode> children = null;
+ String name = "UNKNOWN";
+
+
+ if (node.getClass() == Template.class) {
+ Template template = (Template)node;
+ children = template.getChildren();
+ name = template.getName();
+ } else if (node.getClass() == TemplateElementNode.class) {
+ TemplateElementNode element = (TemplateElementNode)node;
+ children = element.getChildren();
+ name = "ELEMENT: " + element.getName();
+ } else if (node.getClass() == TemplateTextNode.class) {
+ name = "TEXT: " + ((TemplateTextNode)node).getText();
+ } else if (node.getClass() == TemplateCommentNode.class) {
+ name = "COMMENT: " + ((TemplateCommentNode)node).getText();
+ }
+
+ System.out.print(StringUtils.repeat("\t", indentation));
+ System.out.println(name.replace("\n","\\n").replace("\r", "\\r"));
+ if (children == null) {
+ return;
+ }
+ for (TemplateNode child : children) {
+ print(indentation + 1, child);
+ }
+ }
+
+ /**
+ * Assert helper to compare two template node structures
+ *
+ * @param reference - reference node structure
+ * @param parsed - parsed node structure
+ */
+ private static void assertSameStructure(TemplateNode reference, TemplateNode parsed) {
+ if (parsed == null || reference == null) {
+ assertTrue("Expecting both null", parsed == reference);
+ return;
+ }
+ assertEquals("Expecting same class", reference.getClass(), parsed.getClass());
+
+ List<TemplateNode> parsedChildren = null, referenceChildren = null;
+
+ if (parsed.getClass() == Template.class) {
+ Template parsedTemplate = (Template)parsed;
+ Template referenceTemplate = (Template)reference;
+ assertEquals("Expecting same name",
+ referenceTemplate.getName(),
+ parsedTemplate.getName());
+ parsedChildren = parsedTemplate.getChildren();
+ referenceChildren = referenceTemplate.getChildren();
+ } else if (parsed.getClass() == TemplateElementNode.class) {
+ TemplateElementNode parsedElement = (TemplateElementNode)parsed;
+ TemplateElementNode referenceElement = (TemplateElementNode)reference;
+ assertEquals("Expecting same name",
+ referenceElement.getName(),
+ parsedElement.getName());
+ parsedChildren = parsedElement.getChildren();
+ referenceChildren = referenceElement.getChildren();
+ } else if (parsed.getClass() == TemplateTextNode.class) {
+ assertEquals("Expecting same content",
+ ((TemplateTextNode)reference).getText(),
+ ((TemplateTextNode)parsed).getText());
+ } else if (parsed.getClass() == TemplateCommentNode.class) {
+ assertEquals("Expecting same content",
+ ((TemplateCommentNode)reference).getText(),
+ ((TemplateCommentNode)parsed).getText());
+ }
+
+ if (parsedChildren == null || referenceChildren == null) {
+ assertTrue("Expecting both children null", parsedChildren == referenceChildren);
+ return;
+ }
+
+ assertEquals("Expecting same number of children",
+ parsedChildren.size(),
+ referenceChildren.size());
+
+ for (int i = 0, n = parsedChildren.size(); i < n; i++) {
+ assertSameStructure(parsedChildren.get(i), referenceChildren.get(i));
+ }
+ }
+
+ /**
+ * Create a basic template node structure containing one text node and one comment node
+ *
+ * @param textAndComment - String containing text (optional) and comment
+ * @return
+ */
+ private Template createReference(String textAndComment) {
+ int commentIx = textAndComment.indexOf("<!");
+ if (commentIx < 0 || commentIx > textAndComment.length()) {
+ throw new IndexOutOfBoundsException("String must contain text and comment");
+ }
+ Template reference = new Template();
+ if (commentIx > 0) {
+ reference.addChild(new TemplateTextNode(
+ textAndComment.substring(0, commentIx)));
+ }
+ reference.addChild(new TemplateCommentNode(
+ textAndComment.substring(commentIx)));
+ return reference;
+ }
+
+ @Test
+ public void testParseCommentSpanningAcrossCharBuffer() throws Exception {
+ String[] testStrings = new String[] {
+ "<!--/* comment */-->",
+ "1<!--/* comment */-->",
+ "12<!--/* comment */-->",
+ "123<!--/* comment */-->",
+ "1234<!--/* comment */-->",
+ "12345<!--/* comment */-->",
+ "123456<!--/* comment */-->",
+ "1234567<!--/* comment */-->",
+ "12345678<!--/* comment */-->",
+ "123456789<!--/* comment */-->",
+ "1234567890<!--/* comment */-->",
+ "12345678901<!--/* comment */-->"
+ };
+ Template reference = null, parsed = null;
+ Whitebox.setInternalState(HtmlParser.class, "BUF_SIZE", 10);
+
+ try {
+ for (String test : testStrings) {
+ StringReader reader = new StringReader(test);
+ reference = createReference(test);
+
+ TemplateParser.TemplateParserContext context = new TemplateParser.TemplateParserContext();
+ HtmlParser.parse(reader, context);
+ parsed = context.getTemplate();
+
+ assertSameStructure(parsed, reference);
+ }
+ } catch (AssertionError e) {
+ System.out.println("Reference: ");
+ print(0, reference);
+ System.out.println("Parsed: ");
+ print(0, parsed);
+ throw e;
+ }
+ }
+}
Propchange: sling/trunk/contrib/scripting/sightly/engine/src/test/java/org/apache/sling/scripting/sightly/impl/html/dom/HtmlParserTest.java
------------------------------------------------------------------------------
svn:mime-type = text/plain