You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2019/12/17 12:26:05 UTC

[tika] 06/07: TIKA-3016 -- fix OldExcelParser to work with the ToXMLHandler

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch branch_1x
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 242083967621d0c2dda2dd2fed89799153f9cb8f
Author: tallison <ta...@apache.org>
AuthorDate: Mon Dec 16 16:52:24 2019 -0500

    TIKA-3016 -- fix OldExcelParser to work with the ToXMLHandler
---
 .../java/org/apache/tika/parser/microsoft/OldExcelParser.java     | 7 ++-----
 .../java/org/apache/tika/parser/microsoft/OldExcelParserTest.java | 8 ++++++++
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OldExcelParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OldExcelParser.java
index 446eea9..207c28d 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OldExcelParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OldExcelParser.java
@@ -55,10 +55,7 @@ public class OldExcelParser extends AbstractParser {
                                 XHTMLContentHandler xhtml) throws TikaException, IOException, SAXException {
         // Get the whole text, as a single string
         String text = extractor.getText();
-
         // Split and output
-        xhtml.startDocument();
-
         String line;
         BufferedReader reader = new BufferedReader(new StringReader(text));
         while ((line = reader.readLine()) != null) {
@@ -66,8 +63,6 @@ public class OldExcelParser extends AbstractParser {
             xhtml.characters(line);
             xhtml.endElement("p");
         }
-
-        xhtml.endDocument();
     }
 
     public Set<MediaType> getSupportedTypes(ParseContext context) {
@@ -92,6 +87,8 @@ public class OldExcelParser extends AbstractParser {
 
         // Have the text extracted and given to our Content Handler
         XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
+        xhtml.startDocument();
         parse(extractor, xhtml);
+        xhtml.endDocument();
     }
 }
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/OldExcelParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/OldExcelParserTest.java
index fcf601c..36c1dfe 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/OldExcelParserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/OldExcelParserTest.java
@@ -111,4 +111,12 @@ public class OldExcelParserTest extends TikaTest {
         assertContains("<p>(1)</p>", xml);
         assertContains("<p>5.0</p>", xml);
     }
+
+
+    @Test
+    public void testToXMLInOldExcelParser() throws Exception {
+        String xml = getXML("testEXCEL_5.xls").xml;
+        assertContains("Written and saved in Microsoft Excel X for Mac Service Release 1",
+                xml);
+    }
 }