You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by kk...@apache.org on 2012/08/09 23:57:16 UTC

svn commit: r1371506 - /tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/html/HtmlParserTest.java

Author: kkrugler
Date: Thu Aug  9 21:57:16 2012
New Revision: 1371506

URL: http://svn.apache.org/viewvc?rev=1371506&view=rev
Log:
TIKA-889: XHTMLContentHandler wont emit newline when html element matches ENDLINE set

Added test case to validate proper behavior. Trunk passes w/o changes, so marking issue as can't reproduce.

Modified:
    tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/html/HtmlParserTest.java

Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/html/HtmlParserTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/html/HtmlParserTest.java?rev=1371506&r1=1371505&r2=1371506&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/html/HtmlParserTest.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/html/HtmlParserTest.java Thu Aug  9 21:57:16 2012
@@ -762,4 +762,26 @@ public class HtmlParserTest extends Test
         assertTrue(Pattern.matches("(?s).*<body/>.*$", result));
     }
     
+    /**
+     * Test case for TIKA-889
+     * XHTMLContentHandler wont emit newline when html element matches ENDLINE set.
+     * 
+     * @see <a href="https://issues.apache.org/jira/browse/TIKA-889">TIKA-889</a>
+     */
+    public void testNewlineAndIndent() throws Exception {
+        final String html = "<html><head><title>Title</title></head>" +
+                "<body><ul><li>one</li></ul></body></html>";
+
+        BodyContentHandler handler = new BodyContentHandler();
+        new HtmlParser().parse(
+                new ByteArrayInputStream(html.getBytes("UTF-8")),
+                handler,  new Metadata(), new ParseContext());
+        
+        // Make sure we get <tab>, "one", newline, newline
+        String result = handler.toString();
+        
+        assertTrue(Pattern.matches("\tone\n\n", result));
+    }
+
+    
 }