You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2021/11/08 20:14:53 UTC

[tika] branch TIKA-3164 created (now 7497267)

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a change to branch TIKA-3164
in repository https://gitbox.apache.org/repos/asf/tika.git.


      at 7497267  TIKA-3164 -- First attempt -- do not merge

This branch includes the following new commits:

     new 7497267  TIKA-3164 -- First attempt -- do not merge

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


[tika] 01/01: TIKA-3164 -- First attempt -- do not merge

Posted by ta...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch TIKA-3164
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 74972671eef2c20cdae21bcacdf3f6d19c7a8757
Author: tballison <ta...@apache.org>
AuthorDate: Mon Nov 8 15:14:45 2021 -0500

    TIKA-3164 -- First attempt -- do not merge
---
 tika-parent/pom.xml                                |  2 +-
 .../tika/parser/microsoft/OutlookExtractor.java    |  7 +++--
 .../microsoft/ooxml/AbstractOOXMLExtractor.java    |  4 ---
 .../parser/microsoft/ooxml/MetadataExtractor.java  |  7 -----
 .../microsoft/ooxml/OOXMLExtractorFactory.java     | 33 +++++++++++++---------
 .../ooxml/SXSLFPowerPointExtractorDecorator.java   |  4 +--
 .../ooxml/XSLFPowerPointExtractorDecorator.java    | 18 ++----------
 .../ooxml/XSSFExcelExtractorDecorator.java         |  6 ++--
 .../microsoft/ooxml/xps/XPSTextExtractor.java      | 24 ++++++++++++++--
 .../xslf/XSLFEventBasedPowerPointExtractor.java    | 24 ++++++++++++++--
 .../ooxml/xwpf/XWPFEventBasedWordExtractor.java    | 24 ++++++++++++++--
 11 files changed, 99 insertions(+), 54 deletions(-)

diff --git a/tika-parent/pom.xml b/tika-parent/pom.xml
index ca04560..d0b673f 100644
--- a/tika-parent/pom.xml
+++ b/tika-parent/pom.xml
@@ -353,7 +353,7 @@
     <pax.exam.version>4.11.0</pax.exam.version>
     <pdfbox.version>2.0.24</pdfbox.version>
     <!-- NOTE: sync tukaani version with commons-compress in tika-parsers -->
-    <poi.version>4.1.2</poi.version>
+    <poi.version>5.1.0</poi.version>
     <quartz.version>2.3.2</quartz.version>
     <rome.version>1.16.0</rome.version>
     <scm.version>1.12.0</scm.version>
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java
index 2ee27da..fd884de 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java
@@ -662,9 +662,10 @@ public class OutlookExtractor extends AbstractPOIFSExtractor {
 
         for (RecipientChunks chunks : recipientChunks) {
             Recipient r = new Recipient();
-            r.displayName = (chunks.recipientDisplayNameChunk != null) ?
-                    chunks.recipientDisplayNameChunk.toString() : null;
-            r.name = (chunks.recipientNameChunk != null) ? chunks.recipientNameChunk.toString() :
+            r.displayName = (chunks.getRecipientDisplayNameChunk() != null) ?
+                    chunks.getRecipientDisplayNameChunk().toString() : null;
+            r.name = (chunks.getRecipientNameChunk() != null) ?
+                    chunks.getRecipientNameChunk().toString() :
                     null;
             r.emailAddress = chunks.getRecipientEmailAddress();
             List<PropertyValue> vals = chunks.getProperties().get(MAPIProperty.RECIPIENT_TYPE);
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/AbstractOOXMLExtractor.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/AbstractOOXMLExtractor.java
index f23ae12..15bb7a6 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/AbstractOOXMLExtractor.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/AbstractOOXMLExtractor.java
@@ -30,7 +30,6 @@ import java.util.Map;
 import java.util.Set;
 
 import org.apache.poi.ooxml.POIXMLDocument;
-import org.apache.poi.ooxml.extractor.ExtractorFactory;
 import org.apache.poi.ooxml.extractor.POIXMLTextExtractor;
 import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
 import org.apache.poi.openxml4j.opc.OPCPackage;
@@ -94,9 +93,6 @@ public abstract class AbstractOOXMLExtractor implements OOXMLExtractor {
     private static final String TYPE_OLE_OBJECT =
             "application/vnd.openxmlformats-officedocument.oleObject";
 
-    static {
-        ExtractorFactory.setAllThreadsPreferEventExtractors(true);
-    }
 
     private final EmbeddedDocumentExtractor embeddedExtractor;
     private final ParseContext context;
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/MetadataExtractor.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/MetadataExtractor.java
index 09252e9..97efe3e 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/MetadataExtractor.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/MetadataExtractor.java
@@ -23,7 +23,6 @@ import java.util.Optional;
 import org.apache.poi.ooxml.POIXMLProperties;
 import org.apache.poi.ooxml.extractor.POIXMLTextExtractor;
 import org.apache.poi.openxml4j.opc.internal.PackagePropertiesPart;
-import org.apache.poi.openxml4j.util.Nullable;
 import org.apache.poi.xssf.extractor.XSSFEventBasedExcelExtractor;
 import org.apache.xmlbeans.impl.values.XmlValueOutOfRangeException;
 import org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProperty;
@@ -270,12 +269,6 @@ public class MetadataExtractor {
         }
     }
 
-    private void setProperty(Metadata metadata, String name, Nullable<?> value) {
-        if (value.getValue() != null) {
-            setProperty(metadata, name, value.getValue().toString());
-        }
-    }
-
     private void setProperty(Metadata metadata, Property property, String value) {
         if (value != null) {
             metadata.set(property, value);
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLExtractorFactory.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLExtractorFactory.java
index 9f8db62..c15b003 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLExtractorFactory.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLExtractorFactory.java
@@ -25,7 +25,7 @@ import java.util.Locale;
 import org.apache.commons.compress.archivers.zip.UnsupportedZipFeatureException;
 import org.apache.commons.io.input.CloseShieldInputStream;
 import org.apache.poi.ooxml.POIXMLDocument;
-import org.apache.poi.ooxml.extractor.ExtractorFactory;
+import org.apache.poi.ooxml.extractor.POIXMLExtractorFactory;
 import org.apache.poi.ooxml.extractor.POIXMLTextExtractor;
 import org.apache.poi.openxml4j.exceptions.InvalidOperationException;
 import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
@@ -34,10 +34,9 @@ import org.apache.poi.openxml4j.opc.PackageAccess;
 import org.apache.poi.openxml4j.opc.PackagePart;
 import org.apache.poi.openxml4j.opc.PackageRelationshipCollection;
 import org.apache.poi.util.LocaleUtil;
-import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor;
+import org.apache.poi.xslf.extractor.XSLFExtractor;
 import org.apache.poi.xslf.usermodel.XMLSlideShow;
 import org.apache.poi.xslf.usermodel.XSLFRelation;
-import org.apache.poi.xslf.usermodel.XSLFSlideShow;
 import org.apache.poi.xssf.extractor.XSSFBEventBasedExcelExtractor;
 import org.apache.poi.xssf.extractor.XSSFEventBasedExcelExtractor;
 import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
@@ -74,11 +73,21 @@ public class OOXMLExtractorFactory {
 
     private static final Logger LOG = LoggerFactory.getLogger(OOXMLExtractorFactory.class);
     private static final int MAX_BUFFER_LENGTH = 1000000;
+    private static POIXMLExtractorFactory EXTRACTOR_FACTORY = new POIXMLExtractorFactory();
+
+    //TODO find what happened to SUPPORTED_TYPES
+    private static XSLFRelation[] XSLF_RELATIONS = new XSLFRelation[] {
+            XSLFRelation.MAIN, XSLFRelation.MACRO, XSLFRelation.MACRO_TEMPLATE,
+            XSLFRelation.PRESENTATIONML,
+            XSLFRelation.PRESENTATIONML_TEMPLATE, XSLFRelation.PRESENTATION_MACRO
+    };
+    static {
+        POIXMLExtractorFactory.setAllThreadsPreferEventExtractors(true);
+    }
 
     public static void parse(InputStream stream, ContentHandler baseHandler, Metadata metadata,
                              ParseContext context) throws IOException, SAXException, TikaException {
         Locale locale = context.get(Locale.class, LocaleUtil.getUserLocale());
-        ExtractorFactory.setThreadPrefersEventExtractors(true);
 
         //if there's a problem opening the zip file;
         //create a tmp file, and copy what you can read of it.
@@ -186,7 +195,7 @@ public class OOXMLExtractorFactory {
             }
 
             if (poiExtractor == null) {
-                poiExtractor = (POIXMLTextExtractor) ExtractorFactory.createExtractor(pkg);
+                poiExtractor = EXTRACTOR_FACTORY.create(pkg);
             }
 
             POIXMLDocument document = poiExtractor.getDocument();
@@ -212,8 +221,8 @@ public class OOXMLExtractorFactory {
                                 " found. " +
                                 "The extractor returned was a " + poiExtractor);
             } else if (document instanceof XMLSlideShow) {
-                extractor = new XSLFPowerPointExtractorDecorator(context,
-                        (org.apache.poi.xslf.extractor.XSLFPowerPointExtractor) poiExtractor);
+                extractor = new XSLFPowerPointExtractorDecorator(metadata, context,
+                        (org.apache.poi.xslf.extractor.XSLFExtractor) poiExtractor);
             } else if (document instanceof XWPFDocument) {
                 extractor = new XWPFWordExtractorDecorator(metadata, context,
                         (XWPFWordExtractor) poiExtractor);
@@ -302,15 +311,13 @@ public class OOXMLExtractorFactory {
         }
         String targetContentType = corePart.getContentType();
 
-        XSLFRelation[] xslfRelations =
-                org.apache.poi.xslf.extractor.XSLFPowerPointExtractor.SUPPORTED_TYPES;
-
-        for (XSLFRelation xslfRelation : xslfRelations) {
+        for (int i = 0; i < XSLF_RELATIONS.length; i++) {
+            XSLFRelation xslfRelation = XSLF_RELATIONS[i];
             if (xslfRelation.getContentType().equals(targetContentType)) {
                 if (eventBased) {
                     return new XSLFEventBasedPowerPointExtractor(pkg);
                 } else {
-                    return new XSLFPowerPointExtractor(new XSLFSlideShow(pkg));
+                    return new XSLFExtractor(new XMLSlideShow(pkg));
                 }
             }
         }
@@ -319,7 +326,7 @@ public class OOXMLExtractorFactory {
             if (eventBased) {
                 return new XSLFEventBasedPowerPointExtractor(pkg);
             } else {
-                return new XSLFPowerPointExtractor(new XSLFSlideShow(pkg));
+                return new XSLFExtractor(new XMLSlideShow(pkg));
             }
         }
         return null;
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/SXSLFPowerPointExtractorDecorator.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/SXSLFPowerPointExtractorDecorator.java
index 433804a..b24284a 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/SXSLFPowerPointExtractorDecorator.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/SXSLFPowerPointExtractorDecorator.java
@@ -33,7 +33,7 @@ import org.apache.poi.openxml4j.opc.PackageRelationship;
 import org.apache.poi.openxml4j.opc.PackageRelationshipCollection;
 import org.apache.poi.openxml4j.opc.PackagingURIHelper;
 import org.apache.poi.openxml4j.opc.TargetMode;
-import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor;
+import org.apache.poi.xslf.extractor.XSLFExtractor;
 import org.apache.poi.xslf.usermodel.XSLFRelation;
 import org.xml.sax.Attributes;
 import org.xml.sax.ContentHandler;
@@ -96,7 +96,7 @@ public class SXSLFPowerPointExtractorDecorator extends AbstractOOXMLExtractor {
     }
 
     /**
-     * @see XSLFPowerPointExtractor#getText()
+     * @see XSLFExtractor#getText()
      */
     protected void buildXHTML(XHTMLContentHandler xhtml) throws SAXException, IOException {
 
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSLFPowerPointExtractorDecorator.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSLFPowerPointExtractorDecorator.java
index 7994046..8501307 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSLFPowerPointExtractorDecorator.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSLFPowerPointExtractorDecorator.java
@@ -31,7 +31,7 @@ import org.apache.poi.openxml4j.opc.PackageRelationshipCollection;
 import org.apache.poi.openxml4j.opc.PackagingURIHelper;
 import org.apache.poi.openxml4j.opc.TargetMode;
 import org.apache.poi.sl.usermodel.Placeholder;
-import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor;
+import org.apache.poi.xslf.extractor.XSLFExtractor;
 import org.apache.poi.xslf.usermodel.XMLSlideShow;
 import org.apache.poi.xslf.usermodel.XSLFComment;
 import org.apache.poi.xslf.usermodel.XSLFCommentAuthors;
@@ -74,26 +74,14 @@ public class XSLFPowerPointExtractorDecorator extends AbstractOOXMLExtractor {
     private Metadata metadata;
 
     public XSLFPowerPointExtractorDecorator(Metadata metadata, ParseContext context,
-                                            XSLFPowerPointExtractor extractor) {
+                                            XSLFExtractor extractor) {
         super(context, extractor);
         this.metadata = metadata;
     }
 
-    /**
-     * use {@link XSLFPowerPointExtractorDecorator#XSLFPowerPointExtractorDecorator(Metadata,
-     * ParseContext, XSLFPowerPointExtractor)}
-     *
-     * @param context
-     * @param extractor
-     */
-    @Deprecated
-    public XSLFPowerPointExtractorDecorator(ParseContext context,
-                                            XSLFPowerPointExtractor extractor) {
-        this(new Metadata(), context, extractor);
-    }
 
     /**
-     * @see org.apache.poi.xslf.extractor.XSLFPowerPointExtractor#getText()
+     * @see org.apache.poi.xslf.extractor.XSLFExtractor#getText()
      */
     protected void buildXHTML(XHTMLContentHandler xhtml) throws SAXException, IOException {
         XMLSlideShow slideShow = (XMLSlideShow) extractor.getDocument();
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSSFExcelExtractorDecorator.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSSFExcelExtractorDecorator.java
index 9d4949a..3c1b107 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSSFExcelExtractorDecorator.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSSFExcelExtractorDecorator.java
@@ -46,7 +46,7 @@ import org.apache.poi.xssf.eventusermodel.XSSFReader;
 import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler;
 import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler.SheetContentsHandler;
 import org.apache.poi.xssf.extractor.XSSFEventBasedExcelExtractor;
-import org.apache.poi.xssf.model.CommentsTable;
+import org.apache.poi.xssf.model.Comments;
 import org.apache.poi.xssf.model.StylesTable;
 import org.apache.poi.xssf.usermodel.XSSFComment;
 import org.apache.poi.xssf.usermodel.XSSFDrawing;
@@ -159,7 +159,7 @@ public class XSSFExcelExtractorDecorator extends AbstractOOXMLExtractor {
                 addDrawingHyperLinks(sheetPart);
                 sheetParts.add(sheetPart);
 
-                CommentsTable comments = iter.getSheetComments();
+                Comments comments = iter.getSheetComments();
 
                 // Start, and output the sheet name
                 xhtml.startElement("div");
@@ -344,7 +344,7 @@ public class XSSFExcelExtractorDecorator extends AbstractOOXMLExtractor {
 
     }
 
-    public void processSheet(SheetContentsHandler sheetContentsExtractor, CommentsTable comments,
+    public void processSheet(SheetContentsHandler sheetContentsExtractor, Comments comments,
                              StylesTable styles, ReadOnlySharedStringsTable strings,
                              InputStream sheetInputStream) throws IOException, SAXException {
         try {
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/xps/XPSTextExtractor.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/xps/XPSTextExtractor.java
index f49e6de..297290b 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/xps/XPSTextExtractor.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/xps/XPSTextExtractor.java
@@ -18,6 +18,7 @@
 package org.apache.tika.parser.microsoft.ooxml.xps;
 
 
+import java.io.Closeable;
 import java.io.IOException;
 
 import org.apache.poi.ooxml.POIXMLDocument;
@@ -32,13 +33,12 @@ import org.apache.xmlbeans.XmlException;
  * and keep the general framework similar to our other POI-integrated
  * extractors.
  */
-public class XPSTextExtractor extends POIXMLTextExtractor {
+public class XPSTextExtractor implements POIXMLTextExtractor {
 
     private final OPCPackage pkg;
     private final POIXMLProperties properties;
 
     public XPSTextExtractor(OPCPackage pkg) throws OpenXML4JException, XmlException, IOException {
-        super((POIXMLDocument) null);
         this.pkg = pkg;
         this.properties = new POIXMLProperties(pkg);
 
@@ -54,6 +54,21 @@ public class XPSTextExtractor extends POIXMLTextExtractor {
         return null;
     }
 
+    @Override
+    public void setCloseFilesystem(boolean b) {
+
+    }
+
+    @Override
+    public boolean isCloseFilesystem() {
+        return false;
+    }
+
+    @Override
+    public Closeable getFilesystem() {
+        return null;
+    }
+
     public POIXMLProperties.CoreProperties getCoreProperties() {
         return this.properties.getCoreProperties();
     }
@@ -65,4 +80,9 @@ public class XPSTextExtractor extends POIXMLTextExtractor {
     public POIXMLProperties.CustomProperties getCustomProperties() {
         return this.properties.getCustomProperties();
     }
+
+    @Override
+    public POIXMLDocument getDocument() {
+        return null;
+    }
 }
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/xslf/XSLFEventBasedPowerPointExtractor.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/xslf/XSLFEventBasedPowerPointExtractor.java
index ff0fd9f..28b9845 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/xslf/XSLFEventBasedPowerPointExtractor.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/xslf/XSLFEventBasedPowerPointExtractor.java
@@ -17,6 +17,7 @@
 
 package org.apache.tika.parser.microsoft.ooxml.xslf;
 
+import java.io.Closeable;
 import java.io.IOException;
 import java.util.Date;
 
@@ -33,7 +34,7 @@ import org.apache.tika.parser.microsoft.ooxml.ParagraphProperties;
 import org.apache.tika.parser.microsoft.ooxml.RunProperties;
 import org.apache.tika.parser.microsoft.ooxml.xwpf.XWPFEventBasedWordExtractor;
 
-public class XSLFEventBasedPowerPointExtractor extends POIXMLTextExtractor {
+public class XSLFEventBasedPowerPointExtractor implements POIXMLTextExtractor {
 
 
     private OPCPackage container;
@@ -46,7 +47,6 @@ public class XSLFEventBasedPowerPointExtractor extends POIXMLTextExtractor {
 
     public XSLFEventBasedPowerPointExtractor(OPCPackage container)
             throws XmlException, OpenXML4JException, IOException {
-        super((POIXMLDocument) null);
         this.container = container;
         this.properties = new POIXMLProperties(container);
     }
@@ -80,6 +80,11 @@ public class XSLFEventBasedPowerPointExtractor extends POIXMLTextExtractor {
         return this.properties.getCustomProperties();
     }
 
+    @Override
+    public POIXMLDocument getDocument() {
+        return null;
+    }
+
 
     @Override
     public String getText() {
@@ -87,6 +92,21 @@ public class XSLFEventBasedPowerPointExtractor extends POIXMLTextExtractor {
         return "";
     }
 
+    @Override
+    public void setCloseFilesystem(boolean b) {
+
+    }
+
+    @Override
+    public boolean isCloseFilesystem() {
+        return false;
+    }
+
+    @Override
+    public Closeable getFilesystem() {
+        return null;
+    }
+
 
     private static class XSLFToTextContentHandler
             implements OOXMLWordAndPowerPointTextHandler.XWPFBodyContentsHandler {
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/xwpf/XWPFEventBasedWordExtractor.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/xwpf/XWPFEventBasedWordExtractor.java
index 9901eb9..5b87599 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/xwpf/XWPFEventBasedWordExtractor.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/xwpf/XWPFEventBasedWordExtractor.java
@@ -17,6 +17,7 @@
 
 package org.apache.tika.parser.microsoft.ooxml.xwpf;
 
+import java.io.Closeable;
 import java.io.IOException;
 import java.io.InputStream;
 import java.util.Date;
@@ -58,7 +59,7 @@ import org.apache.tika.parser.microsoft.ooxml.XWPFListManager;
 /**
  * Experimental class that is based on POI's XSSFEventBasedExcelExtractor
  */
-public class XWPFEventBasedWordExtractor extends POIXMLTextExtractor {
+public class XWPFEventBasedWordExtractor implements POIXMLTextExtractor {
 
     private static final Logger LOG = LoggerFactory.getLogger(XWPFEventBasedWordExtractor.class);
 
@@ -72,7 +73,6 @@ public class XWPFEventBasedWordExtractor extends POIXMLTextExtractor {
 
     public XWPFEventBasedWordExtractor(OPCPackage container)
             throws XmlException, OpenXML4JException, IOException {
-        super((POIXMLDocument) null);
         this.container = container;
         this.properties = new POIXMLProperties(container);
     }
@@ -106,6 +106,11 @@ public class XWPFEventBasedWordExtractor extends POIXMLTextExtractor {
         return this.properties.getCustomProperties();
     }
 
+    @Override
+    public POIXMLDocument getDocument() {
+        return null;
+    }
+
 
     @Override
     public String getText() {
@@ -152,6 +157,21 @@ public class XWPFEventBasedWordExtractor extends POIXMLTextExtractor {
         return sb.toString();
     }
 
+    @Override
+    public void setCloseFilesystem(boolean b) {
+
+    }
+
+    @Override
+    public boolean isCloseFilesystem() {
+        return false;
+    }
+
+    @Override
+    public Closeable getFilesystem() {
+        return null;
+    }
+
 
     private void handleDocumentPart(PackagePart documentPart, StringBuilder sb)
             throws IOException, SAXException {