You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sling.apache.org by fm...@apache.org on 2015/01/23 15:16:01 UTC

svn commit: r1654218 - in /sling/trunk/contrib/scripting/sightly/engine/src: main/java/org/apache/sling/scripting/sightly/impl/html/dom/ test/java/org/apache/sling/scripting/sightly/impl/html/ test/java/org/apache/sling/scripting/sightly/impl/html/dom/

Author: fmeschbe
Date: Fri Jan 23 14:16:00 2015
New Revision: 1654218

URL: http://svn.apache.org/r1654218
Log:
SLING-4346 Sightly: HtmlParser does not parse documents correctly when comments span across internal char buffer size

Apply patch by Vlad Bailescu (thanks a lot)

Added:
    sling/trunk/contrib/scripting/sightly/engine/src/test/java/org/apache/sling/scripting/sightly/impl/html/
    sling/trunk/contrib/scripting/sightly/engine/src/test/java/org/apache/sling/scripting/sightly/impl/html/dom/
    sling/trunk/contrib/scripting/sightly/engine/src/test/java/org/apache/sling/scripting/sightly/impl/html/dom/HtmlParserTest.java   (with props)
Modified:
    sling/trunk/contrib/scripting/sightly/engine/src/main/java/org/apache/sling/scripting/sightly/impl/html/dom/HtmlParser.java

Modified: sling/trunk/contrib/scripting/sightly/engine/src/main/java/org/apache/sling/scripting/sightly/impl/html/dom/HtmlParser.java
URL: http://svn.apache.org/viewvc/sling/trunk/contrib/scripting/sightly/engine/src/main/java/org/apache/sling/scripting/sightly/impl/html/dom/HtmlParser.java?rev=1654218&r1=1654217&r2=1654218&view=diff
==============================================================================
--- sling/trunk/contrib/scripting/sightly/engine/src/main/java/org/apache/sling/scripting/sightly/impl/html/dom/HtmlParser.java (original)
+++ sling/trunk/contrib/scripting/sightly/engine/src/main/java/org/apache/sling/scripting/sightly/impl/html/dom/HtmlParser.java Fri Jan 23 14:16:00 2015
@@ -27,6 +27,8 @@ import java.io.Reader;
  */
 public final class HtmlParser {
 
+    private static int BUF_SIZE = 2048;
+
     /** Internal character buffer */
     private final CharArrayWriter buffer = new CharArrayWriter(256);
 
@@ -101,7 +103,7 @@ public final class HtmlParser {
     throws IOException {
         try {
             this.documentHandler.onStart();
-            final char[] readBuffer = new char[2048];
+            final char[] readBuffer = new char[BUF_SIZE];
             int readLen = 0;
             while ( (readLen = reader.read(readBuffer)) > 0 ) {
                 this.update(readBuffer, readLen);
@@ -163,7 +165,7 @@ public final class HtmlParser {
                         parseState = PARSE_STATE.COMMENT;
                         parseSubState = 0;
                         tagType = TT_NONE;
-                        flushBuffer();
+                        // keep the accumulated buffer
                     } else if (c == '"' || c == '\'') {
                         quoteChar = c;
                         prevParseState = parseState;
@@ -281,7 +283,7 @@ public final class HtmlParser {
                 case 4:
                     if (c == '>') {
                         parseState = PARSE_STATE.OUTSIDE;
-                        documentHandler.onComment(new String(buf, start, curr - start + 1));
+                        processComment(buf, start, curr - start + 1);
                         start = curr + 1;
                     } else {
                         parseSubState = 2;
@@ -398,7 +400,7 @@ public final class HtmlParser {
             }
         }
         if (start < end) {
-            if (tagType == TT_NONE) {
+            if (tagType == TT_NONE && parseState != PARSE_STATE.COMMENT) {
                 documentHandler.onCharacters(buf, start, end - start);
             } else {
                 buffer.write(buf, start, end - start);
@@ -439,6 +441,20 @@ public final class HtmlParser {
     }
 
     /**
+     * Process a comment from current and accumulated character data
+     *
+     * @param ch character data work buffer
+     * @param off start offset for current data
+     * @param len length of current data
+     * @throws IOException
+     */
+    private void processComment(char[] ch, int off, int len) throws IOException {
+        buffer.write(ch, off, len);
+        documentHandler.onComment(buffer.toString());
+        buffer.reset();
+    }
+
+    /**
      * Decompose a tag and feed it to the document handler.
      *
      * @param ch

Added: sling/trunk/contrib/scripting/sightly/engine/src/test/java/org/apache/sling/scripting/sightly/impl/html/dom/HtmlParserTest.java
URL: http://svn.apache.org/viewvc/sling/trunk/contrib/scripting/sightly/engine/src/test/java/org/apache/sling/scripting/sightly/impl/html/dom/HtmlParserTest.java?rev=1654218&view=auto
==============================================================================
--- sling/trunk/contrib/scripting/sightly/engine/src/test/java/org/apache/sling/scripting/sightly/impl/html/dom/HtmlParserTest.java (added)
+++ sling/trunk/contrib/scripting/sightly/engine/src/test/java/org/apache/sling/scripting/sightly/impl/html/dom/HtmlParserTest.java Fri Jan 23 14:16:00 2015
@@ -0,0 +1,188 @@
+/*******************************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ ******************************************************************************/
+package org.apache.sling.scripting.sightly.impl.html.dom;
+
+import java.io.StringReader;
+import java.util.List;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.sling.scripting.sightly.impl.html.dom.template.*;
+
+import org.junit.Test;
+import org.powermock.reflect.Whitebox;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+public class HtmlParserTest {
+
+    /**
+     * Pretty print a template node structure for debugging of failed tests
+     *
+     * @param indentation - indentation level (the method is used recursively)
+     * @param node - template node to print
+     */
+    private static void print(int indentation, TemplateNode node) {
+        if (node == null) {
+            return;
+        }
+
+        List<TemplateNode> children = null;
+        String name = "UNKNOWN";
+
+
+        if (node.getClass() == Template.class) {
+            Template template = (Template)node;
+            children = template.getChildren();
+            name = template.getName();
+        } else if (node.getClass() == TemplateElementNode.class) {
+            TemplateElementNode element = (TemplateElementNode)node;
+            children = element.getChildren();
+            name = "ELEMENT: " + element.getName();
+        } else if (node.getClass() == TemplateTextNode.class) {
+            name = "TEXT: " + ((TemplateTextNode)node).getText();
+        } else if (node.getClass() == TemplateCommentNode.class) {
+            name = "COMMENT: " + ((TemplateCommentNode)node).getText();
+        }
+
+        System.out.print(StringUtils.repeat("\t", indentation));
+        System.out.println(name.replace("\n","\\n").replace("\r", "\\r"));
+        if (children == null) {
+            return;
+        }
+        for (TemplateNode child : children) {
+            print(indentation + 1, child);
+        }
+    }
+
+    /**
+     * Assert helper to compare two template node structures
+     *
+     * @param reference - reference node structure
+     * @param parsed - parsed node structure
+     */
+    private static void assertSameStructure(TemplateNode reference, TemplateNode parsed) {
+        if (parsed == null || reference == null) {
+            assertTrue("Expecting both null", parsed == reference);
+            return;
+        }
+        assertEquals("Expecting same class", reference.getClass(), parsed.getClass());
+
+        List<TemplateNode> parsedChildren = null, referenceChildren = null;
+
+        if (parsed.getClass() == Template.class) {
+            Template parsedTemplate = (Template)parsed;
+            Template referenceTemplate = (Template)reference;
+            assertEquals("Expecting same name",
+                    referenceTemplate.getName(),
+                    parsedTemplate.getName());
+            parsedChildren = parsedTemplate.getChildren();
+            referenceChildren = referenceTemplate.getChildren();
+        } else if (parsed.getClass() == TemplateElementNode.class) {
+            TemplateElementNode parsedElement = (TemplateElementNode)parsed;
+            TemplateElementNode referenceElement = (TemplateElementNode)reference;
+            assertEquals("Expecting same name",
+                    referenceElement.getName(),
+                    parsedElement.getName());
+            parsedChildren = parsedElement.getChildren();
+            referenceChildren = referenceElement.getChildren();
+        } else if (parsed.getClass() == TemplateTextNode.class) {
+            assertEquals("Expecting same content",
+                    ((TemplateTextNode)reference).getText(),
+                    ((TemplateTextNode)parsed).getText());
+        } else if (parsed.getClass() == TemplateCommentNode.class) {
+            assertEquals("Expecting same content",
+                    ((TemplateCommentNode)reference).getText(),
+                    ((TemplateCommentNode)parsed).getText());
+        }
+
+        if (parsedChildren == null || referenceChildren == null) {
+            assertTrue("Expecting both children null", parsedChildren == referenceChildren);
+            return;
+        }
+
+        assertEquals("Expecting same number of children",
+                parsedChildren.size(),
+                referenceChildren.size());
+
+        for (int i = 0, n = parsedChildren.size(); i < n; i++) {
+            assertSameStructure(parsedChildren.get(i), referenceChildren.get(i));
+        }
+    }
+
+    /**
+     * Create a basic template node structure containing one text node and one comment node
+     *
+     * @param textAndComment - String containing text (optional) and comment
+     * @return
+     */
+    private Template createReference(String textAndComment) {
+        int commentIx = textAndComment.indexOf("<!");
+        if (commentIx < 0 || commentIx > textAndComment.length()) {
+            throw new IndexOutOfBoundsException("String must contain text and comment");
+        }
+        Template reference = new Template();
+        if (commentIx > 0) {
+            reference.addChild(new TemplateTextNode(
+                    textAndComment.substring(0, commentIx)));
+        }
+        reference.addChild(new TemplateCommentNode(
+                textAndComment.substring(commentIx)));
+        return reference;
+    }
+
+    @Test
+    public void testParseCommentSpanningAcrossCharBuffer() throws Exception {
+        String[] testStrings = new String[] {
+                "<!--/* comment */-->",
+                "1<!--/* comment */-->",
+                "12<!--/* comment */-->",
+                "123<!--/* comment */-->",
+                "1234<!--/* comment */-->",
+                "12345<!--/* comment */-->",
+                "123456<!--/* comment */-->",
+                "1234567<!--/* comment */-->",
+                "12345678<!--/* comment */-->",
+                "123456789<!--/* comment */-->",
+                "1234567890<!--/* comment */-->",
+                "12345678901<!--/* comment */-->"
+        };
+        Template reference = null, parsed = null;
+        Whitebox.setInternalState(HtmlParser.class, "BUF_SIZE", 10);
+
+        try {
+            for (String test : testStrings) {
+                StringReader reader = new StringReader(test);
+                reference = createReference(test);
+
+                TemplateParser.TemplateParserContext context = new TemplateParser.TemplateParserContext();
+                HtmlParser.parse(reader, context);
+                parsed = context.getTemplate();
+
+                assertSameStructure(parsed, reference);
+            }
+        } catch (AssertionError e) {
+            System.out.println("Reference: ");
+            print(0, reference);
+            System.out.println("Parsed: ");
+            print(0, parsed);
+            throw e;
+        }
+    }
+}

Propchange: sling/trunk/contrib/scripting/sightly/engine/src/test/java/org/apache/sling/scripting/sightly/impl/html/dom/HtmlParserTest.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain