You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2019/12/17 12:26:05 UTC
[tika] 06/07: TIKA-3016 -- fix OldExcelParser to work with the
ToXMLHandler
This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch branch_1x
in repository https://gitbox.apache.org/repos/asf/tika.git
commit 242083967621d0c2dda2dd2fed89799153f9cb8f
Author: tallison <ta...@apache.org>
AuthorDate: Mon Dec 16 16:52:24 2019 -0500
TIKA-3016 -- fix OldExcelParser to work with the ToXMLHandler
---
.../java/org/apache/tika/parser/microsoft/OldExcelParser.java | 7 ++-----
.../java/org/apache/tika/parser/microsoft/OldExcelParserTest.java | 8 ++++++++
2 files changed, 10 insertions(+), 5 deletions(-)
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OldExcelParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OldExcelParser.java
index 446eea9..207c28d 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OldExcelParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OldExcelParser.java
@@ -55,10 +55,7 @@ public class OldExcelParser extends AbstractParser {
XHTMLContentHandler xhtml) throws TikaException, IOException, SAXException {
// Get the whole text, as a single string
String text = extractor.getText();
-
// Split and output
- xhtml.startDocument();
-
String line;
BufferedReader reader = new BufferedReader(new StringReader(text));
while ((line = reader.readLine()) != null) {
@@ -66,8 +63,6 @@ public class OldExcelParser extends AbstractParser {
xhtml.characters(line);
xhtml.endElement("p");
}
-
- xhtml.endDocument();
}
public Set<MediaType> getSupportedTypes(ParseContext context) {
@@ -92,6 +87,8 @@ public class OldExcelParser extends AbstractParser {
// Have the text extracted and given to our Content Handler
XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
+ xhtml.startDocument();
parse(extractor, xhtml);
+ xhtml.endDocument();
}
}
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/OldExcelParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/OldExcelParserTest.java
index fcf601c..36c1dfe 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/OldExcelParserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/OldExcelParserTest.java
@@ -111,4 +111,12 @@ public class OldExcelParserTest extends TikaTest {
assertContains("<p>(1)</p>", xml);
assertContains("<p>5.0</p>", xml);
}
+
+
+ @Test
+ public void testToXMLInOldExcelParser() throws Exception {
+ String xml = getXML("testEXCEL_5.xls").xml;
+ assertContains("Written and saved in Microsoft Excel X for Mac Service Release 1",
+ xml);
+ }
}