You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by dm...@apache.org on 2013/12/28 23:44:42 UTC
svn commit: r1553957 - in /tika/trunk:
tika-core/src/main/java/org/apache/tika/sax/TextContentHandler.java
tika-parsers/src/test/java/org/apache/tika/parser/html/HtmlParserTest.java
Author: dmeikle
Date: Sat Dec 28 22:44:41 2013
New Revision: 1553957
URL: http://svn.apache.org/r1553957
Log:
TIKA-820: Added setDocumentLocator delegate call in TextContentHandler
Modified:
tika/trunk/tika-core/src/main/java/org/apache/tika/sax/TextContentHandler.java
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/html/HtmlParserTest.java
Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/sax/TextContentHandler.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/sax/TextContentHandler.java?rev=1553957&r1=1553956&r2=1553957&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/sax/TextContentHandler.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/sax/TextContentHandler.java Sat Dec 28 22:44:41 2013
@@ -45,6 +45,11 @@ public class TextContentHandler extends
}
@Override
+ public void setDocumentLocator(org.xml.sax.Locator locator) {
+ delegate.setDocumentLocator(locator);
+ }
+
+ @Override
public void characters(char[] ch, int start, int length)
throws SAXException {
delegate.characters(ch, start, length);
Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/html/HtmlParserTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/html/HtmlParserTest.java?rev=1553957&r1=1553956&r2=1553957&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/html/HtmlParserTest.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/html/HtmlParserTest.java Sat Dec 28 22:44:41 2013
@@ -44,12 +44,14 @@ import org.apache.tika.parser.ParseConte
import org.apache.tika.sax.BodyContentHandler;
import org.apache.tika.sax.LinkContentHandler;
import org.apache.tika.sax.TeeContentHandler;
+import org.apache.tika.sax.TextContentHandler;
import org.ccil.cowan.tagsoup.HTMLSchema;
import org.ccil.cowan.tagsoup.Schema;
import org.junit.Ignore;
import org.junit.Test;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
+import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
@@ -925,4 +927,73 @@ public class HtmlParserTest {
assertEquals("\ttext\n\n", linkContentHandler.getLinks().get(0).getText());
}
+ /**
+ * Test case for TIKA-820: Locator is unset for HTML parser
+ *
+ * @see <a href="https://issues.apache.org/jira/browse/TIKA-820">TIKA-820</a>
+ */
+ @Test
+ public void testLocator() throws Exception {
+ final int line = 0;
+ final int col = 1;
+ final int[] textPosition = new int[2];
+
+ new HtmlParser().parse(HtmlParserTest.class.getResourceAsStream("/test-documents/testHTML.html"),
+ new ContentHandler(){
+ Locator locator;
+
+ public void setDocumentLocator(Locator locator) {
+ this.locator = locator;
+ }
+
+ public void startDocument() throws SAXException {
+ }
+
+ public void endDocument() throws SAXException {
+ }
+
+ public void startPrefixMapping(String prefix, String uri)
+ throws SAXException {
+ }
+
+ public void endPrefixMapping(String prefix)
+ throws SAXException {
+ }
+
+ public void startElement(String uri, String localName,
+ String qName, Attributes atts) throws SAXException {
+ }
+
+ public void endElement(String uri, String localName,
+ String qName) throws SAXException {
+ }
+
+ public void characters(char[] ch, int start, int length)
+ throws SAXException {
+ String text = new String(ch, start, length);
+ if (text.equals("Test Indexation Html") && locator != null) {
+ textPosition[line] = locator.getLineNumber();
+ textPosition[col] = locator.getColumnNumber();
+ }
+ }
+
+ public void ignorableWhitespace(char[] ch, int start,
+ int length) throws SAXException {
+ }
+
+ public void processingInstruction(String target, String data)
+ throws SAXException {
+ }
+
+ public void skippedEntity(String name) throws SAXException {
+ }},
+ new Metadata(),
+ new ParseContext());
+
+ // The text occurs at line 24 (if lines start at 0) or 25 (if lines start at 1).
+ assertEquals(24, textPosition[line]);
+ // The column reported seems fuzzy, just test it is close enough.
+ assertTrue(Math.abs(textPosition[col]-47) < 10);
+ }
+
}