You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by dm...@apache.org on 2013/12/28 23:44:42 UTC

svn commit: r1553957 - in /tika/trunk: tika-core/src/main/java/org/apache/tika/sax/TextContentHandler.java tika-parsers/src/test/java/org/apache/tika/parser/html/HtmlParserTest.java

Author: dmeikle
Date: Sat Dec 28 22:44:41 2013
New Revision: 1553957

URL: http://svn.apache.org/r1553957
Log:
TIKA-820: Added setDocumentLocator delegate call in TextContentHandler

Modified:
    tika/trunk/tika-core/src/main/java/org/apache/tika/sax/TextContentHandler.java
    tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/html/HtmlParserTest.java

Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/sax/TextContentHandler.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/sax/TextContentHandler.java?rev=1553957&r1=1553956&r2=1553957&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/sax/TextContentHandler.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/sax/TextContentHandler.java Sat Dec 28 22:44:41 2013
@@ -45,6 +45,11 @@ public class TextContentHandler extends 
     }
 
     @Override
+    public void setDocumentLocator(org.xml.sax.Locator locator) {
+	    delegate.setDocumentLocator(locator);
+    }
+
+    @Override
     public void characters(char[] ch, int start, int length)
             throws SAXException {
         delegate.characters(ch, start, length);

Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/html/HtmlParserTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/html/HtmlParserTest.java?rev=1553957&r1=1553956&r2=1553957&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/html/HtmlParserTest.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/html/HtmlParserTest.java Sat Dec 28 22:44:41 2013
@@ -44,12 +44,14 @@ import org.apache.tika.parser.ParseConte
 import org.apache.tika.sax.BodyContentHandler;
 import org.apache.tika.sax.LinkContentHandler;
 import org.apache.tika.sax.TeeContentHandler;
+import org.apache.tika.sax.TextContentHandler;
 import org.ccil.cowan.tagsoup.HTMLSchema;
 import org.ccil.cowan.tagsoup.Schema;
 import org.junit.Ignore;
 import org.junit.Test;
 import org.xml.sax.Attributes;
 import org.xml.sax.ContentHandler;
+import org.xml.sax.Locator;
 import org.xml.sax.SAXException;
 import org.xml.sax.helpers.DefaultHandler;
 
@@ -925,4 +927,73 @@ public class HtmlParserTest {
         assertEquals("\ttext\n\n", linkContentHandler.getLinks().get(0).getText());
     }
 
+    /**
+     * Test case for TIKA-820:  Locator is unset for HTML parser
+     *
+     * @see <a href="https://issues.apache.org/jira/browse/TIKA-820">TIKA-820</a>
+     */
+    @Test
+    public void testLocator() throws Exception {
+        final int line = 0;
+        final int col = 1;
+        final int[] textPosition = new int[2];
+	
+        new HtmlParser().parse(HtmlParserTest.class.getResourceAsStream("/test-documents/testHTML.html"),
+        	new ContentHandler(){
+                Locator locator;
+
+                public void setDocumentLocator(Locator locator) {
+                    this.locator = locator;
+                }
+
+                public void startDocument() throws SAXException {
+                }
+
+                public void endDocument() throws SAXException {
+                }
+
+                public void startPrefixMapping(String prefix, String uri)
+                        throws SAXException {
+                }
+
+                public void endPrefixMapping(String prefix)
+                        throws SAXException {
+                }
+
+                public void startElement(String uri, String localName,
+                                         String qName, Attributes atts) throws SAXException {
+                }
+
+                public void endElement(String uri, String localName,
+                                       String qName) throws SAXException {
+                }
+
+                public void characters(char[] ch, int start, int length)
+                        throws SAXException {
+                    String text = new String(ch, start, length);
+                    if (text.equals("Test Indexation Html") && locator != null) {
+                        textPosition[line] = locator.getLineNumber();
+                        textPosition[col] = locator.getColumnNumber();
+                    }
+                }
+
+                public void ignorableWhitespace(char[] ch, int start,
+                                                int length) throws SAXException {
+                }
+
+                public void processingInstruction(String target, String data)
+                        throws SAXException {
+                }
+
+                public void skippedEntity(String name) throws SAXException {
+                }},
+                new Metadata(),
+                new ParseContext());
+
+        // The text occurs at line 24 (if lines start at 0) or 25 (if lines start at 1).
+        assertEquals(24, textPosition[line]);
+        // The column reported seems fuzzy, just test it is close enough.
+        assertTrue(Math.abs(textPosition[col]-47) < 10);
+    }
+    
 }