You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2021/12/10 21:12:22 UTC

[tika] branch main updated: TIKA-3164 -- upgrade to POI 5.1.0 (#462)

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/main by this push:
     new 22261ab  TIKA-3164 -- upgrade to POI 5.1.0 (#462)
22261ab is described below

commit 22261ab09b2809847da87f24252dad2dfde81978
Author: Tim Allison <ta...@apache.org>
AuthorDate: Fri Dec 10 16:10:10 2021 -0500

    TIKA-3164 -- upgrade to POI 5.1.0 (#462)
    
    * TIKA-3164 -- First attempt -- do not merge
    
    * TIKA-3164 update POI to 5.1.0
---
 CHANGES.txt                                        |   4 +
 tika-parent/pom.xml                                |   4 +-
 .../detect/microsoft/ooxml/OPCPackageDetector.java |   3 +-
 .../tika/parser/microsoft/OutlookExtractor.java    |   7 +-
 .../microsoft/ooxml/AbstractOOXMLExtractor.java    |   4 -
 .../parser/microsoft/ooxml/MetadataExtractor.java  |   7 -
 .../microsoft/ooxml/OOXMLExtractorFactory.java     |  45 +--
 .../parser/microsoft/ooxml/OPCPackageWrapper.java  |  39 ++
 .../ooxml/SXSLFPowerPointExtractorDecorator.java   |   4 +-
 .../microsoft/ooxml/TikaXSSFSheetXMLHandler.java   | 411 +++++++++++++++++++++
 .../ooxml/XSLFPowerPointExtractorDecorator.java    |  18 +-
 .../ooxml/XSSFExcelExtractorDecorator.java         |   9 +-
 .../microsoft/ooxml/xps/XPSTextExtractor.java      |  24 +-
 .../xslf/XSLFEventBasedPowerPointExtractor.java    |  48 +--
 .../ooxml/xwpf/XWPFEventBasedWordExtractor.java    |  43 +--
 .../src/test/resources/log4j2.xml                  |  40 ++
 16 files changed, 605 insertions(+), 105 deletions(-)

diff --git a/CHANGES.txt b/CHANGES.txt
index 21f09c3..e797992 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,5 +1,9 @@
 Release 2.2.0 - ???
 
+   * Upgrade to Apache POI 5.1.0. NOTE: This results in an increase
+     of several orders of magnitude in logging. See http://xyz for an example
+     of how to turn off logging for POI. (TIKA-3164).
+
    * Fix logic bug in PipesServer that prevented concatenation of
      content from attachments (TIKA-3609).
 
diff --git a/tika-parent/pom.xml b/tika-parent/pom.xml
index 6700748..c3aae85 100644
--- a/tika-parent/pom.xml
+++ b/tika-parent/pom.xml
@@ -353,7 +353,7 @@
     <pax.exam.version>4.11.0</pax.exam.version>
     <pdfbox.version>2.0.24</pdfbox.version>
     <!-- NOTE: sync tukaani version with commons-compress in tika-parsers -->
-    <poi.version>4.1.2</poi.version>
+    <poi.version>5.1.0</poi.version>
     <quartz.version>2.3.2</quartz.version>
     <rome.version>1.16.0</rome.version>
     <scm.version>1.12.0</scm.version>
@@ -561,7 +561,7 @@
       <dependency>
         <groupId>javax.annotation</groupId>
         <artifactId>javax.annotation-api</artifactId>
-        <version>1.3.2</version>
+        <version>${javax.annotation.version}</version>
       </dependency>
       <dependency>
         <groupId>javax.xml.soap</groupId>
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/detect/microsoft/ooxml/OPCPackageDetector.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/detect/microsoft/ooxml/OPCPackageDetector.java
index dd58d10..4e05078 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/detect/microsoft/ooxml/OPCPackageDetector.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/detect/microsoft/ooxml/OPCPackageDetector.java
@@ -51,6 +51,7 @@ import org.apache.tika.detect.zip.ZipContainerDetector;
 import org.apache.tika.io.TikaInputStream;
 import org.apache.tika.mime.MediaType;
 import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.microsoft.ooxml.OPCPackageWrapper;
 import org.apache.tika.sax.OfflineContentHandler;
 import org.apache.tika.sax.StoppingEarlyException;
 import org.apache.tika.utils.XMLReaderUtils;
@@ -259,7 +260,7 @@ public class OPCPackageDetector implements ZipContainerDetector {
         }
         ((CloseShieldZipFileZipEntrySource)zipEntrySource).allowClose();
         //only set the open container if we made it here
-        stream.setOpenContainer(pkg);
+        stream.setOpenContainer(new OPCPackageWrapper(pkg));
         return type;
     }
 
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java
index 2ee27da..fd884de 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java
@@ -662,9 +662,10 @@ public class OutlookExtractor extends AbstractPOIFSExtractor {
 
         for (RecipientChunks chunks : recipientChunks) {
             Recipient r = new Recipient();
-            r.displayName = (chunks.recipientDisplayNameChunk != null) ?
-                    chunks.recipientDisplayNameChunk.toString() : null;
-            r.name = (chunks.recipientNameChunk != null) ? chunks.recipientNameChunk.toString() :
+            r.displayName = (chunks.getRecipientDisplayNameChunk() != null) ?
+                    chunks.getRecipientDisplayNameChunk().toString() : null;
+            r.name = (chunks.getRecipientNameChunk() != null) ?
+                    chunks.getRecipientNameChunk().toString() :
                     null;
             r.emailAddress = chunks.getRecipientEmailAddress();
             List<PropertyValue> vals = chunks.getProperties().get(MAPIProperty.RECIPIENT_TYPE);
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/AbstractOOXMLExtractor.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/AbstractOOXMLExtractor.java
index e1a6598..2cdf4ff 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/AbstractOOXMLExtractor.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/AbstractOOXMLExtractor.java
@@ -30,7 +30,6 @@ import java.util.Map;
 import java.util.Set;
 
 import org.apache.poi.ooxml.POIXMLDocument;
-import org.apache.poi.ooxml.extractor.ExtractorFactory;
 import org.apache.poi.ooxml.extractor.POIXMLTextExtractor;
 import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
 import org.apache.poi.openxml4j.opc.OPCPackage;
@@ -94,9 +93,6 @@ public abstract class AbstractOOXMLExtractor implements OOXMLExtractor {
     private static final String TYPE_OLE_OBJECT =
             "application/vnd.openxmlformats-officedocument.oleObject";
 
-    static {
-        ExtractorFactory.setAllThreadsPreferEventExtractors(true);
-    }
 
     private final EmbeddedDocumentExtractor embeddedExtractor;
     private final ParseContext context;
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/MetadataExtractor.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/MetadataExtractor.java
index 09252e9..97efe3e 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/MetadataExtractor.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/MetadataExtractor.java
@@ -23,7 +23,6 @@ import java.util.Optional;
 import org.apache.poi.ooxml.POIXMLProperties;
 import org.apache.poi.ooxml.extractor.POIXMLTextExtractor;
 import org.apache.poi.openxml4j.opc.internal.PackagePropertiesPart;
-import org.apache.poi.openxml4j.util.Nullable;
 import org.apache.poi.xssf.extractor.XSSFEventBasedExcelExtractor;
 import org.apache.xmlbeans.impl.values.XmlValueOutOfRangeException;
 import org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProperty;
@@ -270,12 +269,6 @@ public class MetadataExtractor {
         }
     }
 
-    private void setProperty(Metadata metadata, String name, Nullable<?> value) {
-        if (value.getValue() != null) {
-            setProperty(metadata, name, value.getValue().toString());
-        }
-    }
-
     private void setProperty(Metadata metadata, Property property, String value) {
         if (value != null) {
             metadata.set(property, value);
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLExtractorFactory.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLExtractorFactory.java
index 9f8db62..ec15fed 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLExtractorFactory.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLExtractorFactory.java
@@ -25,7 +25,7 @@ import java.util.Locale;
 import org.apache.commons.compress.archivers.zip.UnsupportedZipFeatureException;
 import org.apache.commons.io.input.CloseShieldInputStream;
 import org.apache.poi.ooxml.POIXMLDocument;
-import org.apache.poi.ooxml.extractor.ExtractorFactory;
+import org.apache.poi.ooxml.extractor.POIXMLExtractorFactory;
 import org.apache.poi.ooxml.extractor.POIXMLTextExtractor;
 import org.apache.poi.openxml4j.exceptions.InvalidOperationException;
 import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
@@ -34,10 +34,9 @@ import org.apache.poi.openxml4j.opc.PackageAccess;
 import org.apache.poi.openxml4j.opc.PackagePart;
 import org.apache.poi.openxml4j.opc.PackageRelationshipCollection;
 import org.apache.poi.util.LocaleUtil;
-import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor;
+import org.apache.poi.xslf.extractor.XSLFExtractor;
 import org.apache.poi.xslf.usermodel.XMLSlideShow;
 import org.apache.poi.xslf.usermodel.XSLFRelation;
-import org.apache.poi.xslf.usermodel.XSLFSlideShow;
 import org.apache.poi.xssf.extractor.XSSFBEventBasedExcelExtractor;
 import org.apache.poi.xssf.extractor.XSSFEventBasedExcelExtractor;
 import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
@@ -74,11 +73,21 @@ public class OOXMLExtractorFactory {
 
     private static final Logger LOG = LoggerFactory.getLogger(OOXMLExtractorFactory.class);
     private static final int MAX_BUFFER_LENGTH = 1000000;
+    private static POIXMLExtractorFactory EXTRACTOR_FACTORY = new POIXMLExtractorFactory();
+
+    //TODO find what happened to SUPPORTED_TYPES
+    private static XSLFRelation[] XSLF_RELATIONS = new XSLFRelation[] {
+            XSLFRelation.MAIN, XSLFRelation.MACRO, XSLFRelation.MACRO_TEMPLATE,
+            XSLFRelation.PRESENTATIONML,
+            XSLFRelation.PRESENTATIONML_TEMPLATE, XSLFRelation.PRESENTATION_MACRO
+    };
+    static {
+        POIXMLExtractorFactory.setAllThreadsPreferEventExtractors(true);
+    }
 
     public static void parse(InputStream stream, ContentHandler baseHandler, Metadata metadata,
                              ParseContext context) throws IOException, SAXException, TikaException {
         Locale locale = context.get(Locale.class, LocaleUtil.getUserLocale());
-        ExtractorFactory.setThreadPrefersEventExtractors(true);
 
         //if there's a problem opening the zip file;
         //create a tmp file, and copy what you can read of it.
@@ -90,8 +99,8 @@ public class OOXMLExtractorFactory {
 
             // Locate or Open the OPCPackage for the file
             TikaInputStream tis = TikaInputStream.cast(stream);
-            if (tis != null && tis.getOpenContainer() instanceof OPCPackage) {
-                pkg = (OPCPackage) tis.getOpenContainer();
+            if (tis != null && tis.getOpenContainer() instanceof OPCPackageWrapper) {
+                pkg = ((OPCPackageWrapper) tis.getOpenContainer()).getOPCPackage();
             } else if (tis != null && tis.hasFile()) {
                 try {
                     pkg = OPCPackage.open(tis.getFile().getPath(), PackageAccess.READ);
@@ -100,7 +109,7 @@ public class OOXMLExtractorFactory {
                     ZipSalvager.salvageCopy(tis.getFile(), tmpRepairedCopy);
                     pkg = OPCPackage.open(tmpRepairedCopy, PackageAccess.READ);
                 }
-                tis.setOpenContainer(pkg);
+                tis.setOpenContainer(new OPCPackageWrapper(pkg));
             } else {
                 //OPCPackage slurps rris into memory so we can close rris
                 //without apparent problems
@@ -186,7 +195,7 @@ public class OOXMLExtractorFactory {
             }
 
             if (poiExtractor == null) {
-                poiExtractor = (POIXMLTextExtractor) ExtractorFactory.createExtractor(pkg);
+                poiExtractor = EXTRACTOR_FACTORY.create(pkg);
             }
 
             POIXMLDocument document = poiExtractor.getDocument();
@@ -212,8 +221,8 @@ public class OOXMLExtractorFactory {
                                 " found. " +
                                 "The extractor returned was a " + poiExtractor);
             } else if (document instanceof XMLSlideShow) {
-                extractor = new XSLFPowerPointExtractorDecorator(context,
-                        (org.apache.poi.xslf.extractor.XSLFPowerPointExtractor) poiExtractor);
+                extractor = new XSLFPowerPointExtractorDecorator(metadata, context,
+                        (org.apache.poi.xslf.extractor.XSLFExtractor) poiExtractor);
             } else if (document instanceof XWPFDocument) {
                 extractor = new XWPFWordExtractorDecorator(metadata, context,
                         (XWPFWordExtractor) poiExtractor);
@@ -243,11 +252,7 @@ public class OOXMLExtractorFactory {
         } finally {
             if (tmpRepairedCopy != null) {
                 if (pkg != null) {
-                    try {
-                        pkg.close();
-                    } catch (IOException e) {
-                        LOG.warn("problem closing pkg file");
-                    }
+                    pkg.revert();
                 }
                 boolean deleted = tmpRepairedCopy.delete();
                 if (!deleted) {
@@ -302,15 +307,13 @@ public class OOXMLExtractorFactory {
         }
         String targetContentType = corePart.getContentType();
 
-        XSLFRelation[] xslfRelations =
-                org.apache.poi.xslf.extractor.XSLFPowerPointExtractor.SUPPORTED_TYPES;
-
-        for (XSLFRelation xslfRelation : xslfRelations) {
+        for (int i = 0; i < XSLF_RELATIONS.length; i++) {
+            XSLFRelation xslfRelation = XSLF_RELATIONS[i];
             if (xslfRelation.getContentType().equals(targetContentType)) {
                 if (eventBased) {
                     return new XSLFEventBasedPowerPointExtractor(pkg);
                 } else {
-                    return new XSLFPowerPointExtractor(new XSLFSlideShow(pkg));
+                    return new XSLFExtractor(new XMLSlideShow(pkg));
                 }
             }
         }
@@ -319,7 +322,7 @@ public class OOXMLExtractorFactory {
             if (eventBased) {
                 return new XSLFEventBasedPowerPointExtractor(pkg);
             } else {
-                return new XSLFPowerPointExtractor(new XSLFSlideShow(pkg));
+                return new XSLFExtractor(new XMLSlideShow(pkg));
             }
         }
         return null;
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/OPCPackageWrapper.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/OPCPackageWrapper.java
new file mode 100644
index 0000000..e58afa2
--- /dev/null
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/OPCPackageWrapper.java
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.microsoft.ooxml;
+
+import java.io.Closeable;
+import java.io.IOException;
+
+import org.apache.poi.openxml4j.opc.OPCPackage;
+
+public class OPCPackageWrapper implements Closeable {
+    private final OPCPackage opcPackage;
+
+    public OPCPackageWrapper(OPCPackage opcPackage) {
+        this.opcPackage = opcPackage;
+    }
+
+    @Override
+    public void close() throws IOException {
+        opcPackage.revert();
+    }
+
+    public OPCPackage getOPCPackage() {
+        return opcPackage;
+    }
+}
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/SXSLFPowerPointExtractorDecorator.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/SXSLFPowerPointExtractorDecorator.java
index 433804a..b24284a 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/SXSLFPowerPointExtractorDecorator.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/SXSLFPowerPointExtractorDecorator.java
@@ -33,7 +33,7 @@ import org.apache.poi.openxml4j.opc.PackageRelationship;
 import org.apache.poi.openxml4j.opc.PackageRelationshipCollection;
 import org.apache.poi.openxml4j.opc.PackagingURIHelper;
 import org.apache.poi.openxml4j.opc.TargetMode;
-import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor;
+import org.apache.poi.xslf.extractor.XSLFExtractor;
 import org.apache.poi.xslf.usermodel.XSLFRelation;
 import org.xml.sax.Attributes;
 import org.xml.sax.ContentHandler;
@@ -96,7 +96,7 @@ public class SXSLFPowerPointExtractorDecorator extends AbstractOOXMLExtractor {
     }
 
     /**
-     * @see XSLFPowerPointExtractor#getText()
+     * @see XSLFExtractor#getText()
      */
     protected void buildXHTML(XHTMLContentHandler xhtml) throws SAXException, IOException {
 
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/TikaXSSFSheetXMLHandler.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/TikaXSSFSheetXMLHandler.java
new file mode 100644
index 0000000..ad2ac8e
--- /dev/null
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/TikaXSSFSheetXMLHandler.java
@@ -0,0 +1,411 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.microsoft.ooxml;
+
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.Queue;
+
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+import org.apache.poi.ss.usermodel.BuiltinFormats;
+import org.apache.poi.ss.usermodel.DataFormatter;
+import org.apache.poi.ss.usermodel.RichTextString;
+import org.apache.poi.ss.util.CellAddress;
+import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler;
+import org.apache.poi.xssf.model.Comments;
+import org.apache.poi.xssf.model.SharedStrings;
+import org.apache.poi.xssf.model.Styles;
+import org.apache.poi.xssf.usermodel.XSSFCellStyle;
+import org.apache.poi.xssf.usermodel.XSSFComment;
+import org.apache.poi.xssf.usermodel.XSSFRichTextString;
+import org.xml.sax.Attributes;
+import org.xml.sax.SAXException;
+import org.xml.sax.helpers.DefaultHandler;
+
+/**
+ * This is a temporary work around for POI 5.1.0: https://bz.apache.org/bugzilla/show_bug.cgi?id=65676
+ */
+public class TikaXSSFSheetXMLHandler extends DefaultHandler {
+    private static final Logger LOG = LogManager.getLogger(TikaXSSFSheetXMLHandler.class);
+    private Styles stylesTable;
+    private Comments comments;
+    private SharedStrings sharedStringsTable;
+    private final XSSFSheetXMLHandler.SheetContentsHandler sheetContentsHandler;
+    private boolean vIsOpen;
+    private boolean fIsOpen;
+    private boolean isIsOpen;
+    private boolean hfIsOpen;
+    private xssfDataType nextDataType;
+    private short formatIndex;
+    private String formatString;
+    private final DataFormatter formatter;
+    private int rowNum;
+    private int nextRowNum;
+    private String cellRef;
+    private boolean formulasNotResults;
+    private StringBuilder value;
+    private StringBuilder formula;
+    private StringBuilder headerFooter;
+    private Queue<CellAddress> commentCellRefs;
+
+    public TikaXSSFSheetXMLHandler(Styles styles, Comments comments, SharedStrings strings,
+                                   XSSFSheetXMLHandler.SheetContentsHandler sheetContentsHandler,
+                                   DataFormatter dataFormatter, boolean formulasNotResults) {
+        this.value = new StringBuilder(64);
+        this.formula = new StringBuilder(64);
+        this.headerFooter = new StringBuilder(64);
+        this.stylesTable = styles;
+        this.comments = comments;
+        this.sharedStringsTable = strings;
+        this.sheetContentsHandler = sheetContentsHandler;
+        this.formulasNotResults = formulasNotResults;
+        this.nextDataType = xssfDataType.NUMBER;
+        this.formatter = dataFormatter;
+        this.init(comments);
+    }
+
+    public TikaXSSFSheetXMLHandler(Styles styles, SharedStrings strings,
+                                   XSSFSheetXMLHandler.SheetContentsHandler sheetContentsHandler,
+                                   DataFormatter dataFormatter, boolean formulasNotResults) {
+        this(styles, (Comments) null, strings, sheetContentsHandler, dataFormatter,
+                formulasNotResults);
+    }
+
+    public TikaXSSFSheetXMLHandler(Styles styles, SharedStrings strings,
+                                   XSSFSheetXMLHandler.SheetContentsHandler sheetContentsHandler,
+                                   boolean formulasNotResults) {
+        this(styles, strings, sheetContentsHandler, new DataFormatter(), formulasNotResults);
+    }
+
+    private void init(Comments commentsTable) {
+        if (commentsTable != null) {
+            this.commentCellRefs = new LinkedList();
+            Iterator<CellAddress> iter = commentsTable.getCellAddresses();
+
+            while (iter.hasNext()) {
+                this.commentCellRefs.add(iter.next());
+            }
+        }
+    }
+
+    private boolean isTextTag(String name) {
+        if ("v".equals(name)) {
+            return true;
+        } else if ("inlineStr".equals(name)) {
+            return true;
+        } else {
+            return "t".equals(name) && this.isIsOpen;
+        }
+    }
+
+    public void startElement(String uri, String localName, String qName, Attributes attributes)
+            throws SAXException {
+        if (uri == null ||
+                uri.equals("http://schemas.openxmlformats.org/spreadsheetml/2006/main")) {
+            if (this.isTextTag(localName)) {
+                this.vIsOpen = true;
+                if (!this.isIsOpen) {
+                    this.value.setLength(0);
+                }
+            } else if ("is".equals(localName)) {
+                this.isIsOpen = true;
+            } else {
+                String cellType;
+                String cellStyleStr;
+                if ("f".equals(localName)) {
+                    this.formula.setLength(0);
+                    if (this.nextDataType == xssfDataType.NUMBER) {
+                        this.nextDataType = xssfDataType.FORMULA;
+                    }
+
+                    cellType = attributes.getValue("t");
+                    if (cellType != null && cellType.equals("shared")) {
+                        cellStyleStr = attributes.getValue("ref");
+                        String si = attributes.getValue("si");
+                        if (cellStyleStr != null) {
+                            this.fIsOpen = true;
+                        } else if (this.formulasNotResults) {
+                            LOG.atWarn().log("shared formulas not yet supported!");
+                        }
+                    } else {
+                        this.fIsOpen = true;
+                    }
+                } else if (!"oddHeader".equals(localName) && !"evenHeader".equals(localName) &&
+                        !"firstHeader".equals(localName) && !"firstFooter".equals(localName) &&
+                        !"oddFooter".equals(localName) && !"evenFooter".equals(localName)) {
+                    if ("row".equals(localName)) {
+                        cellType = attributes.getValue("r");
+                        if (cellType != null) {
+                            this.rowNum = Integer.parseInt(cellType) - 1;
+                        } else {
+                            this.rowNum = this.nextRowNum;
+                        }
+
+                        this.sheetContentsHandler.startRow(this.rowNum);
+                    } else if ("c".equals(localName)) {
+                        this.nextDataType = xssfDataType.NUMBER;
+                        this.formatIndex = -1;
+                        this.formatString = null;
+                        this.cellRef = attributes.getValue("r");
+                        cellType = attributes.getValue("t");
+                        cellStyleStr = attributes.getValue("s");
+                        if ("b".equals(cellType)) {
+                            this.nextDataType = xssfDataType.BOOLEAN;
+                        } else if ("e".equals(cellType)) {
+                            this.nextDataType = xssfDataType.ERROR;
+                        } else if ("inlineStr".equals(cellType)) {
+                            this.nextDataType = xssfDataType.INLINE_STRING;
+                        } else if ("s".equals(cellType)) {
+                            this.nextDataType = xssfDataType.SST_STRING;
+                        } else if ("str".equals(cellType)) {
+                            this.nextDataType = xssfDataType.FORMULA;
+                        } else {
+                            XSSFCellStyle style = null;
+                            if (this.stylesTable != null) {
+                                if (cellStyleStr != null) {
+                                    int styleIndex = Integer.parseInt(cellStyleStr);
+                                    style = this.stylesTable.getStyleAt(styleIndex);
+                                } else if (this.stylesTable.getNumCellStyles() > 0) {
+                                    style = this.stylesTable.getStyleAt(0);
+                                }
+                            }
+
+                            if (style != null) {
+                                this.formatIndex = style.getDataFormat();
+                                this.formatString = style.getDataFormatString();
+                                if (this.formatString == null) {
+                                    this.formatString =
+                                            BuiltinFormats.getBuiltinFormat(this.formatIndex);
+                                }
+                            }
+                        }
+                    }
+                } else {
+                    this.hfIsOpen = true;
+                    this.headerFooter.setLength(0);
+                }
+            }
+
+        }
+    }
+
+    public void endElement(String uri, String localName, String qName) throws SAXException {
+        if (uri == null ||
+                uri.equals("http://schemas.openxmlformats.org/spreadsheetml/2006/main")) {
+            if (this.isTextTag(localName)) {
+                this.vIsOpen = false;
+                if (!this.isIsOpen) {
+                    this.outputCell();
+                }
+            } else if ("f".equals(localName)) {
+                this.fIsOpen = false;
+            } else if ("is".equals(localName)) {
+                this.isIsOpen = false;
+                this.outputCell();
+                this.value.setLength(0);
+            } else if ("row".equals(localName)) {
+                this.checkForEmptyCellComments(EmptyCellCommentsCheckType.END_OF_ROW);
+                this.sheetContentsHandler.endRow(this.rowNum);
+                this.nextRowNum = this.rowNum + 1;
+            } else if ("sheetData".equals(localName)) {
+                this.checkForEmptyCellComments(EmptyCellCommentsCheckType.END_OF_SHEET_DATA);
+                this.sheetContentsHandler.endSheet();
+            } else if (!"oddHeader".equals(localName) && !"evenHeader".equals(localName) &&
+                    !"firstHeader".equals(localName)) {
+                if ("oddFooter".equals(localName) || "evenFooter".equals(localName) ||
+                        "firstFooter".equals(localName)) {
+                    this.hfIsOpen = false;
+                    this.sheetContentsHandler.headerFooter(this.headerFooter.toString(), false,
+                            localName);
+                }
+            } else {
+                this.hfIsOpen = false;
+                this.sheetContentsHandler.headerFooter(this.headerFooter.toString(), true,
+                        localName);
+            }
+
+        }
+    }
+
+    public void characters(char[] ch, int start, int length) throws SAXException {
+        if (this.vIsOpen) {
+            this.value.append(ch, start, length);
+        }
+
+        if (this.fIsOpen) {
+            this.formula.append(ch, start, length);
+        }
+
+        if (this.hfIsOpen) {
+            this.headerFooter.append(ch, start, length);
+        }
+
+    }
+
+    private void outputCell() {
+        String thisStr = null;
+        switch (this.nextDataType) {
+            case BOOLEAN:
+                char first = this.value.charAt(0);
+                thisStr = first == '0' ? "FALSE" : "TRUE";
+                break;
+            case ERROR:
+                thisStr = "ERROR:" + this.value;
+                break;
+            case FORMULA:
+                if (this.formulasNotResults) {
+                    thisStr = this.formula.toString();
+                } else {
+                    String fv = this.value.toString();
+                    if (this.formatString != null) {
+                        try {
+                            double d = Double.parseDouble(fv);
+                            thisStr = this.formatter.formatRawCellContents(d, this.formatIndex,
+                                    this.formatString);
+                        } catch (NumberFormatException var8) {
+                            thisStr = fv;
+                        }
+                    } else {
+                        thisStr = fv;
+                    }
+                }
+                break;
+            case INLINE_STRING:
+                XSSFRichTextString rtsi = new XSSFRichTextString(this.value.toString());
+                thisStr = rtsi.toString();
+                break;
+            case SST_STRING:
+                String sstIndex = this.value.toString();
+
+                try {
+                    int idx = Integer.parseInt(sstIndex);
+                    RichTextString rtss = this.sharedStringsTable.getItemAt(idx);
+                    thisStr = rtss.toString();
+                } catch (NumberFormatException var7) {
+                    LOG.atError().withThrowable(var7)
+                            .log("Failed to parse SST index '{}'", sstIndex);
+                }
+                break;
+            case NUMBER:
+                String n = this.value.toString();
+                if (this.formatString != null && n.length() > 0) {
+                    thisStr = this.formatter.formatRawCellContents(Double.parseDouble(n),
+                            this.formatIndex, this.formatString);
+                } else {
+                    thisStr = n;
+                }
+                break;
+            default:
+                thisStr = "(TODO: Unexpected type: " + this.nextDataType + ")";
+        }
+
+        this.checkForEmptyCellComments(EmptyCellCommentsCheckType.CELL);
+        XSSFComment comment = this.comments != null ?
+                this.comments.findCellComment(new CellAddress(this.cellRef)) : null;
+        this.sheetContentsHandler.cell(this.cellRef, thisStr, comment);
+        this.value.setLength(0);
+    }
+
+    private void checkForEmptyCellComments(EmptyCellCommentsCheckType type) {
+        if (this.commentCellRefs != null && !this.commentCellRefs.isEmpty()) {
+            if (type == EmptyCellCommentsCheckType.END_OF_SHEET_DATA) {
+                while (!this.commentCellRefs.isEmpty()) {
+                    this.outputEmptyCellComment((CellAddress) this.commentCellRefs.remove());
+                }
+
+                return;
+            }
+
+            if (this.cellRef == null) {
+                if (type == EmptyCellCommentsCheckType.END_OF_ROW) {
+                    while (!this.commentCellRefs.isEmpty()) {
+                        if (((CellAddress) this.commentCellRefs.peek()).getRow() != this.rowNum) {
+                            return;
+                        }
+
+                        this.outputEmptyCellComment((CellAddress) this.commentCellRefs.remove());
+                    }
+
+                    return;
+                }
+
+                throw new IllegalStateException(
+                        "Cell ref should be null only if there are only empty cells in the row; rowNum: " +
+                                this.rowNum);
+            }
+
+            CellAddress nextCommentCellRef;
+            do {
+                CellAddress cellRef = new CellAddress(this.cellRef);
+                CellAddress peekCellRef = (CellAddress) this.commentCellRefs.peek();
+                if (type == EmptyCellCommentsCheckType.CELL && cellRef.equals(peekCellRef)) {
+                    this.commentCellRefs.remove();
+                    return;
+                }
+
+                int comparison = peekCellRef.compareTo(cellRef);
+                if (comparison > 0 && type == EmptyCellCommentsCheckType.END_OF_ROW &&
+                        peekCellRef.getRow() <= this.rowNum) {
+                    nextCommentCellRef = (CellAddress) this.commentCellRefs.remove();
+                    this.outputEmptyCellComment(nextCommentCellRef);
+                } else if (comparison < 0 && type == EmptyCellCommentsCheckType.CELL &&
+                        peekCellRef.getRow() <= this.rowNum) {
+                    nextCommentCellRef = (CellAddress) this.commentCellRefs.remove();
+                    this.outputEmptyCellComment(nextCommentCellRef);
+                } else {
+                    nextCommentCellRef = null;
+                }
+            } while (nextCommentCellRef != null && !this.commentCellRefs.isEmpty());
+        }
+
+    }
+
+    private void outputEmptyCellComment(CellAddress cellRef) {
+        XSSFComment comment = this.comments.findCellComment(cellRef);
+        this.sheetContentsHandler.cell(cellRef.formatAsString(), (String) null, comment);
+    }
+
+    public interface SheetContentsHandler {
+        void startRow(int var1);
+
+        void endRow(int var1);
+
+        void cell(String var1, String var2, XSSFComment var3);
+
+        default void headerFooter(String text, boolean isHeader, String tagName) {
+        }
+
+        default void endSheet() {
+        }
+    }
+
+    private static enum EmptyCellCommentsCheckType {
+        CELL, END_OF_ROW, END_OF_SHEET_DATA;
+
+        private EmptyCellCommentsCheckType() {
+        }
+    }
+
+    static enum xssfDataType {
+        BOOLEAN, ERROR, FORMULA, INLINE_STRING, SST_STRING, NUMBER;
+
+        private xssfDataType() {
+        }
+    }
+}
+
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSLFPowerPointExtractorDecorator.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSLFPowerPointExtractorDecorator.java
index 7994046..8501307 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSLFPowerPointExtractorDecorator.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSLFPowerPointExtractorDecorator.java
@@ -31,7 +31,7 @@ import org.apache.poi.openxml4j.opc.PackageRelationshipCollection;
 import org.apache.poi.openxml4j.opc.PackagingURIHelper;
 import org.apache.poi.openxml4j.opc.TargetMode;
 import org.apache.poi.sl.usermodel.Placeholder;
-import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor;
+import org.apache.poi.xslf.extractor.XSLFExtractor;
 import org.apache.poi.xslf.usermodel.XMLSlideShow;
 import org.apache.poi.xslf.usermodel.XSLFComment;
 import org.apache.poi.xslf.usermodel.XSLFCommentAuthors;
@@ -74,26 +74,14 @@ public class XSLFPowerPointExtractorDecorator extends AbstractOOXMLExtractor {
     private Metadata metadata;
 
     public XSLFPowerPointExtractorDecorator(Metadata metadata, ParseContext context,
-                                            XSLFPowerPointExtractor extractor) {
+                                            XSLFExtractor extractor) {
         super(context, extractor);
         this.metadata = metadata;
     }
 
-    /**
-     * use {@link XSLFPowerPointExtractorDecorator#XSLFPowerPointExtractorDecorator(Metadata,
-     * ParseContext, XSLFPowerPointExtractor)}
-     *
-     * @param context
-     * @param extractor
-     */
-    @Deprecated
-    public XSLFPowerPointExtractorDecorator(ParseContext context,
-                                            XSLFPowerPointExtractor extractor) {
-        this(new Metadata(), context, extractor);
-    }
 
     /**
-     * @see org.apache.poi.xslf.extractor.XSLFPowerPointExtractor#getText()
+     * @see org.apache.poi.xslf.extractor.XSLFExtractor#getText()
      */
     protected void buildXHTML(XHTMLContentHandler xhtml) throws SAXException, IOException {
         XMLSlideShow slideShow = (XMLSlideShow) extractor.getDocument();
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSSFExcelExtractorDecorator.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSSFExcelExtractorDecorator.java
index 9d4949a..d06c6fe 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSSFExcelExtractorDecorator.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSSFExcelExtractorDecorator.java
@@ -43,10 +43,9 @@ import org.apache.poi.ss.usermodel.HeaderFooter;
 import org.apache.poi.ss.util.CellReference;
 import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable;
 import org.apache.poi.xssf.eventusermodel.XSSFReader;
-import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler;
 import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler.SheetContentsHandler;
 import org.apache.poi.xssf.extractor.XSSFEventBasedExcelExtractor;
-import org.apache.poi.xssf.model.CommentsTable;
+import org.apache.poi.xssf.model.Comments;
 import org.apache.poi.xssf.model.StylesTable;
 import org.apache.poi.xssf.usermodel.XSSFComment;
 import org.apache.poi.xssf.usermodel.XSSFDrawing;
@@ -159,7 +158,7 @@ public class XSSFExcelExtractorDecorator extends AbstractOOXMLExtractor {
                 addDrawingHyperLinks(sheetPart);
                 sheetParts.add(sheetPart);
 
-                CommentsTable comments = iter.getSheetComments();
+                Comments comments = iter.getSheetComments();
 
                 // Start, and output the sheet name
                 xhtml.startElement("div");
@@ -344,13 +343,13 @@ public class XSSFExcelExtractorDecorator extends AbstractOOXMLExtractor {
 
     }
 
-    public void processSheet(SheetContentsHandler sheetContentsExtractor, CommentsTable comments,
+    public void processSheet(SheetContentsHandler sheetContentsHandler, Comments comments,
                              StylesTable styles, ReadOnlySharedStringsTable strings,
                              InputStream sheetInputStream) throws IOException, SAXException {
         try {
 
             XSSFSheetInterestingPartsCapturer handler = new XSSFSheetInterestingPartsCapturer(
-                    new XSSFSheetXMLHandler(styles, comments, strings, sheetContentsExtractor,
+                    new TikaXSSFSheetXMLHandler(styles, comments, strings, sheetContentsHandler,
                             formatter, false));
             XMLReaderUtils.parseSAX(sheetInputStream, handler, parseContext);
             sheetInputStream.close();
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/xps/XPSTextExtractor.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/xps/XPSTextExtractor.java
index f49e6de..297290b 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/xps/XPSTextExtractor.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/xps/XPSTextExtractor.java
@@ -18,6 +18,7 @@
 package org.apache.tika.parser.microsoft.ooxml.xps;
 
 
+import java.io.Closeable;
 import java.io.IOException;
 
 import org.apache.poi.ooxml.POIXMLDocument;
@@ -32,13 +33,12 @@ import org.apache.xmlbeans.XmlException;
  * and keep the general framework similar to our other POI-integrated
  * extractors.
  */
-public class XPSTextExtractor extends POIXMLTextExtractor {
+public class XPSTextExtractor implements POIXMLTextExtractor {
 
     private final OPCPackage pkg;
     private final POIXMLProperties properties;
 
     public XPSTextExtractor(OPCPackage pkg) throws OpenXML4JException, XmlException, IOException {
-        super((POIXMLDocument) null);
         this.pkg = pkg;
         this.properties = new POIXMLProperties(pkg);
 
@@ -54,6 +54,21 @@ public class XPSTextExtractor extends POIXMLTextExtractor {
         return null;
     }
 
+    @Override
+    public void setCloseFilesystem(boolean b) {
+
+    }
+
+    @Override
+    public boolean isCloseFilesystem() {
+        return false;
+    }
+
+    @Override
+    public Closeable getFilesystem() {
+        return null;
+    }
+
     public POIXMLProperties.CoreProperties getCoreProperties() {
         return this.properties.getCoreProperties();
     }
@@ -65,4 +80,9 @@ public class XPSTextExtractor extends POIXMLTextExtractor {
     public POIXMLProperties.CustomProperties getCustomProperties() {
         return this.properties.getCustomProperties();
     }
+
+    @Override
+    public POIXMLDocument getDocument() {
+        return null;
+    }
 }
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/xslf/XSLFEventBasedPowerPointExtractor.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/xslf/XSLFEventBasedPowerPointExtractor.java
index ff0fd9f..46ada51 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/xslf/XSLFEventBasedPowerPointExtractor.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/xslf/XSLFEventBasedPowerPointExtractor.java
@@ -17,6 +17,7 @@
 
 package org.apache.tika.parser.microsoft.ooxml.xslf;
 
+import java.io.Closeable;
 import java.io.IOException;
 import java.util.Date;
 
@@ -25,45 +26,24 @@ import org.apache.poi.ooxml.POIXMLProperties;
 import org.apache.poi.ooxml.extractor.POIXMLTextExtractor;
 import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
 import org.apache.poi.openxml4j.opc.OPCPackage;
-import org.apache.poi.openxml4j.opc.PackageAccess;
 import org.apache.xmlbeans.XmlException;
 
 import org.apache.tika.parser.microsoft.ooxml.OOXMLWordAndPowerPointTextHandler;
 import org.apache.tika.parser.microsoft.ooxml.ParagraphProperties;
 import org.apache.tika.parser.microsoft.ooxml.RunProperties;
-import org.apache.tika.parser.microsoft.ooxml.xwpf.XWPFEventBasedWordExtractor;
 
-public class XSLFEventBasedPowerPointExtractor extends POIXMLTextExtractor {
+public class XSLFEventBasedPowerPointExtractor implements POIXMLTextExtractor {
 
 
     private OPCPackage container;
     private POIXMLProperties properties;
 
-    public XSLFEventBasedPowerPointExtractor(String path)
-            throws XmlException, OpenXML4JException, IOException {
-        this(OPCPackage.open(path, PackageAccess.READ));
-    }
-
     public XSLFEventBasedPowerPointExtractor(OPCPackage container)
             throws XmlException, OpenXML4JException, IOException {
-        super((POIXMLDocument) null);
         this.container = container;
         this.properties = new POIXMLProperties(container);
     }
 
-
-    public static void main(String[] args) throws Exception {
-        if (args.length < 1) {
-            System.err.println("Use:");
-            System.err.println("  XSLFEventBasedPowerPointExtractor <filename.pptx>");
-            System.exit(1);
-        }
-
-        XWPFEventBasedWordExtractor extractor = new XWPFEventBasedWordExtractor(args[0]);
-        System.out.println(extractor.getText());
-        extractor.close();
-    }
-
     public OPCPackage getPackage() {
         return this.container;
     }
@@ -80,6 +60,11 @@ public class XSLFEventBasedPowerPointExtractor extends POIXMLTextExtractor {
         return this.properties.getCustomProperties();
     }
 
+    @Override
+    public POIXMLDocument getDocument() {
+        return null;
+    }
+
 
     @Override
     public String getText() {
@@ -87,6 +72,25 @@ public class XSLFEventBasedPowerPointExtractor extends POIXMLTextExtractor {
         return "";
     }
 
+    @Override
+    public void setCloseFilesystem(boolean b) {
+
+    }
+
+    @Override
+    public boolean isCloseFilesystem() {
+        return false;
+    }
+
+    @Override
+    public Closeable getFilesystem() {
+        return null;
+    }
+
+    @Override
+    public void close() throws IOException {
+        getPackage().revert();
+    }
 
     private static class XSLFToTextContentHandler
             implements OOXMLWordAndPowerPointTextHandler.XWPFBodyContentsHandler {
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/xwpf/XWPFEventBasedWordExtractor.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/xwpf/XWPFEventBasedWordExtractor.java
index 9901eb9..ffc583c 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/xwpf/XWPFEventBasedWordExtractor.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/xwpf/XWPFEventBasedWordExtractor.java
@@ -17,6 +17,7 @@
 
 package org.apache.tika.parser.microsoft.ooxml.xwpf;
 
+import java.io.Closeable;
 import java.io.IOException;
 import java.io.InputStream;
 import java.util.Date;
@@ -33,7 +34,6 @@ import org.apache.poi.ooxml.util.SAXHelper;
 import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
 import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
 import org.apache.poi.openxml4j.opc.OPCPackage;
-import org.apache.poi.openxml4j.opc.PackageAccess;
 import org.apache.poi.openxml4j.opc.PackagePart;
 import org.apache.poi.openxml4j.opc.PackageRelationship;
 import org.apache.poi.openxml4j.opc.PackageRelationshipCollection;
@@ -58,38 +58,19 @@ import org.apache.tika.parser.microsoft.ooxml.XWPFListManager;
 /**
  * Experimental class that is based on POI's XSSFEventBasedExcelExtractor
  */
-public class XWPFEventBasedWordExtractor extends POIXMLTextExtractor {
+public class XWPFEventBasedWordExtractor implements POIXMLTextExtractor {
 
     private static final Logger LOG = LoggerFactory.getLogger(XWPFEventBasedWordExtractor.class);
 
     private OPCPackage container;
     private POIXMLProperties properties;
 
-    public XWPFEventBasedWordExtractor(String path)
-            throws XmlException, OpenXML4JException, IOException {
-        this(OPCPackage.open(path, PackageAccess.READ));
-    }
-
     public XWPFEventBasedWordExtractor(OPCPackage container)
             throws XmlException, OpenXML4JException, IOException {
-        super((POIXMLDocument) null);
         this.container = container;
         this.properties = new POIXMLProperties(container);
     }
 
-
-    public static void main(String[] args) throws Exception {
-        if (args.length < 1) {
-            System.err.println("Use:");
-            System.err.println("  XWPFEventBasedWordExtractor <filename.xlsx>");
-            System.exit(1);
-        }
-
-        XWPFEventBasedWordExtractor extractor = new XWPFEventBasedWordExtractor(args[0]);
-        System.out.println(extractor.getText());
-        extractor.close();
-    }
-
     public OPCPackage getPackage() {
         return this.container;
     }
@@ -106,6 +87,11 @@ public class XWPFEventBasedWordExtractor extends POIXMLTextExtractor {
         return this.properties.getCustomProperties();
     }
 
+    @Override
+    public POIXMLDocument getDocument() {
+        return null;
+    }
+
 
     @Override
     public String getText() {
@@ -152,6 +138,21 @@ public class XWPFEventBasedWordExtractor extends POIXMLTextExtractor {
         return sb.toString();
     }
 
+    @Override
+    public void setCloseFilesystem(boolean b) {
+
+    }
+
+    @Override
+    public boolean isCloseFilesystem() {
+        return false;
+    }
+
+    @Override
+    public Closeable getFilesystem() {
+        return null;
+    }
+
 
     private void handleDocumentPart(PackagePart documentPart, StringBuilder sb)
             throws IOException, SAXException {
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/resources/log4j2.xml b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/resources/log4j2.xml
new file mode 100644
index 0000000..1e9327e
--- /dev/null
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/resources/log4j2.xml
@@ -0,0 +1,40 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no" ?>
+
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+<Configuration status="WARN">
+  <Appenders>
+    <Console name="Console" target="SYSTEM_ERR">
+      <PatternLayout pattern="%-5p [%t] %d{HH:mm:ss,SSS} %c %m%n"/>
+    </Console>
+  </Appenders>
+  <Loggers>
+    <Root level="info">
+      <AppenderRef ref="Console"/>
+    </Root>
+    <!-- effectively turn off the logging for POI 5.x but leave the xmlhelper
+         to warn-->
+    <Logger name="org.apache.poi.util.XMLHelper" level="INFO" additivity="false">
+      <AppenderRef ref="Console"/>
+    </Logger>
+    <Logger name="org.apache.poi" level="ERROR" additivity="false">
+      <AppenderRef ref="Console"/>
+    </Logger>
+  </Loggers>
+</Configuration>
\ No newline at end of file