You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@poi.apache.org by ki...@apache.org on 2020/08/13 21:08:25 UTC

svn commit: r1880839 [1/3] - in /poi/trunk/src: integrationtest/org/apache/poi/stress/ java/org/apache/poi/extractor/ java/org/apache/poi/hpsf/extractor/ java/org/apache/poi/hssf/extractor/ java/org/apache/poi/sl/extractor/ java/org/apache/poi/ss/extra...

Author: kiwiwings
Date: Thu Aug 13 21:08:24 2020
New Revision: 1880839

URL: http://svn.apache.org/viewvc?rev=1880839&view=rev
Log:
#64411 - Provide JigSaw modules
- rework extractors - see bugzilla entry for more information

Added:
    poi/trunk/src/java/org/apache/poi/extractor/ExtractorFactory.java
      - copied, changed from r1880838, poi/trunk/src/java/org/apache/poi/extractor/OLE2ExtractorFactory.java
    poi/trunk/src/java/org/apache/poi/extractor/ExtractorProvider.java   (with props)
    poi/trunk/src/java/org/apache/poi/extractor/MainExtractorFactory.java   (with props)
    poi/trunk/src/ooxml/java/org/apache/poi/ooxml/extractor/POIXMLExtractorFactory.java
      - copied, changed from r1880838, poi/trunk/src/ooxml/java/org/apache/poi/ooxml/extractor/ExtractorFactory.java
    poi/trunk/src/ooxml/java/org/apache/poi/xslf/extractor/
    poi/trunk/src/ooxml/java/org/apache/poi/xslf/extractor/XSLFExtractor.java   (with props)
    poi/trunk/src/resources/main/META-INF/services/org.apache.poi.extractor.ExtractorProvider
    poi/trunk/src/resources/ooxml/META-INF/services/org.apache.poi.extractor.ExtractorProvider
      - copied, changed from r1880689, poi/trunk/src/resources/main/META-INF/services/org.apache.poi.ss.usermodel.WorkbookProvider
    poi/trunk/src/resources/scratchpad/META-INF/
    poi/trunk/src/resources/scratchpad/META-INF/services/
    poi/trunk/src/resources/scratchpad/META-INF/services/org.apache.poi.extractor.ExtractorProvider
Removed:
    poi/trunk/src/java/org/apache/poi/extractor/OLE2ExtractorFactory.java
    poi/trunk/src/ooxml/java/org/apache/poi/ooxml/extractor/ExtractorFactory.java
    poi/trunk/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java
    poi/trunk/src/scratchpad/src/org/apache/poi/hsmf/extractor/OutlookTextExtactor.java
Modified:
    poi/trunk/src/integrationtest/org/apache/poi/stress/AbstractFileHandler.java
    poi/trunk/src/integrationtest/org/apache/poi/stress/XSLFFileHandler.java
    poi/trunk/src/java/org/apache/poi/extractor/POIOLE2TextExtractor.java
    poi/trunk/src/java/org/apache/poi/extractor/POITextExtractor.java
    poi/trunk/src/java/org/apache/poi/hpsf/extractor/HPSFPropertiesExtractor.java
    poi/trunk/src/java/org/apache/poi/hssf/extractor/EventBasedExcelExtractor.java
    poi/trunk/src/java/org/apache/poi/hssf/extractor/ExcelExtractor.java
    poi/trunk/src/java/org/apache/poi/hssf/extractor/OldExcelExtractor.java
    poi/trunk/src/java/org/apache/poi/sl/extractor/SlideShowExtractor.java
    poi/trunk/src/java/org/apache/poi/ss/extractor/ExcelExtractor.java
    poi/trunk/src/multimodule/ooxml/java9/module-info.class
    poi/trunk/src/multimodule/ooxml/java9/module-info.java
    poi/trunk/src/multimodule/ooxml/test9/module-info.class
    poi/trunk/src/multimodule/ooxml/test9/module-info.java
    poi/trunk/src/multimodule/poi/java9/module-info.class
    poi/trunk/src/multimodule/poi/java9/module-info.java
    poi/trunk/src/multimodule/poi/test9/module-info.class
    poi/trunk/src/multimodule/poi/test9/module-info.java
    poi/trunk/src/multimodule/scratchpad/java9/module-info.class
    poi/trunk/src/multimodule/scratchpad/java9/module-info.java
    poi/trunk/src/multimodule/scratchpad/test9/module-info.class
    poi/trunk/src/multimodule/scratchpad/test9/module-info.java
    poi/trunk/src/ooxml/java/org/apache/poi/ooxml/extractor/CommandLineTextExtractor.java
    poi/trunk/src/ooxml/java/org/apache/poi/ooxml/extractor/POIXMLPropertiesTextExtractor.java
    poi/trunk/src/ooxml/java/org/apache/poi/ooxml/extractor/POIXMLTextExtractor.java
    poi/trunk/src/ooxml/java/org/apache/poi/xdgf/extractor/XDGFVisioExtractor.java
    poi/trunk/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFBEventBasedExcelExtractor.java
    poi/trunk/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFEventBasedExcelExtractor.java
    poi/trunk/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFExcelExtractor.java
    poi/trunk/src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFWordExtractor.java
    poi/trunk/src/ooxml/testcases/org/apache/poi/extractor/ooxml/TestExtractorFactory.java
    poi/trunk/src/ooxml/testcases/org/apache/poi/openxml4j/opc/TestPackage.java
    poi/trunk/src/ooxml/testcases/org/apache/poi/poifs/crypt/tests/TestHxxFEncryption.java
    poi/trunk/src/ooxml/testcases/org/apache/poi/xslf/TestXSLFBugs.java
    poi/trunk/src/ooxml/testcases/org/apache/poi/xslf/extractor/TestXSLFPowerPointExtractor.java
    poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFEventBasedExcelExtractorUsingFactory.java
    poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFExcelExtractorUsingFactory.java
    poi/trunk/src/scratchpad/src/org/apache/poi/extractor/ole2/OLE2ScratchpadExtractorFactory.java
    poi/trunk/src/scratchpad/src/org/apache/poi/hdgf/extractor/VisioTextExtractor.java
    poi/trunk/src/scratchpad/src/org/apache/poi/hpbf/extractor/PublisherTextExtractor.java
    poi/trunk/src/scratchpad/src/org/apache/poi/hslf/usermodel/HSLFSlideShow.java
    poi/trunk/src/scratchpad/src/org/apache/poi/hslf/usermodel/HSLFSlideShowImpl.java
    poi/trunk/src/scratchpad/src/org/apache/poi/hsmf/extractor/OutlookTextExtractor.java
    poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/extractor/Word6Extractor.java
    poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/extractor/WordExtractor.java
    poi/trunk/src/scratchpad/testcases/org/apache/poi/hdgf/extractor/TestVisioExtractor.java
    poi/trunk/src/scratchpad/testcases/org/apache/poi/hsmf/TestFixedSizedProperties.java
    poi/trunk/src/scratchpad/testcases/org/apache/poi/hsmf/extractor/TestOutlookTextExtractor.java
    poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestWordExtractorBugs.java
    poi/trunk/src/testcases/org/apache/poi/hpsf/extractor/TestHPSFPropertiesExtractor.java
    poi/trunk/src/testcases/org/apache/poi/hssf/extractor/TestExcelExtractor.java

Modified: poi/trunk/src/integrationtest/org/apache/poi/stress/AbstractFileHandler.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/integrationtest/org/apache/poi/stress/AbstractFileHandler.java?rev=1880839&r1=1880838&r2=1880839&view=diff
==============================================================================
--- poi/trunk/src/integrationtest/org/apache/poi/stress/AbstractFileHandler.java (original)
+++ poi/trunk/src/integrationtest/org/apache/poi/stress/AbstractFileHandler.java Thu Aug 13 21:08:24 2020
@@ -29,11 +29,11 @@ import java.util.HashSet;
 import java.util.Set;
 
 import org.apache.poi.EncryptedDocumentException;
+import org.apache.poi.extractor.ExtractorFactory;
 import org.apache.poi.extractor.POIOLE2TextExtractor;
 import org.apache.poi.extractor.POITextExtractor;
 import org.apache.poi.hpsf.extractor.HPSFPropertiesExtractor;
 import org.apache.poi.hssf.extractor.EventBasedExcelExtractor;
-import org.apache.poi.ooxml.extractor.ExtractorFactory;
 import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
 import org.apache.poi.ss.extractor.ExcelExtractor;
 import org.apache.poi.util.IOUtils;

Modified: poi/trunk/src/integrationtest/org/apache/poi/stress/XSLFFileHandler.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/integrationtest/org/apache/poi/stress/XSLFFileHandler.java?rev=1880839&r1=1880838&r2=1880839&view=diff
==============================================================================
--- poi/trunk/src/integrationtest/org/apache/poi/stress/XSLFFileHandler.java (original)
+++ poi/trunk/src/integrationtest/org/apache/poi/stress/XSLFFileHandler.java Thu Aug 13 21:08:24 2020
@@ -23,7 +23,7 @@ import java.io.File;
 import java.io.FileInputStream;
 import java.io.InputStream;
 
-import org.apache.poi.ooxml.extractor.ExtractorFactory;
+import org.apache.poi.extractor.ExtractorFactory;
 import org.apache.poi.sl.extractor.SlideShowExtractor;
 import org.apache.poi.xslf.usermodel.XMLSlideShow;
 import org.apache.poi.xslf.usermodel.XSLFSlideShow;
@@ -37,11 +37,11 @@ public class XSLFFileHandler extends Sli
 		assertNotNull(slideInner.getPresentation());
 		assertNotNull(slideInner.getSlideMasterReferences());
 		assertNotNull(slideInner.getSlideReferences());
-		
+
 		new POIXMLDocumentHandler().handlePOIXMLDocument(slide);
 
 		handleSlideShow(slide);
-		
+
 		slideInner.close();
 		slide.close();
 	}
@@ -49,11 +49,12 @@ public class XSLFFileHandler extends Sli
 	@Override
     public void handleExtracting(File file) throws Exception {
         super.handleExtracting(file);
-        
-        
+
+
         // additionally try the other getText() methods
 
-		try (SlideShowExtractor extractor = ExtractorFactory.createExtractor(file)) {
+		//noinspection rawtypes
+		try (SlideShowExtractor extractor = (SlideShowExtractor) ExtractorFactory.createExtractor(file)) {
 			assertNotNull(extractor);
 			extractor.setSlidesByDefault(true);
 			extractor.setNotesByDefault(true);

Copied: poi/trunk/src/java/org/apache/poi/extractor/ExtractorFactory.java (from r1880838, poi/trunk/src/java/org/apache/poi/extractor/OLE2ExtractorFactory.java)
URL: http://svn.apache.org/viewvc/poi/trunk/src/java/org/apache/poi/extractor/ExtractorFactory.java?p2=poi/trunk/src/java/org/apache/poi/extractor/ExtractorFactory.java&p1=poi/trunk/src/java/org/apache/poi/extractor/OLE2ExtractorFactory.java&r1=1880838&r2=1880839&rev=1880839&view=diff
==============================================================================
--- poi/trunk/src/java/org/apache/poi/extractor/OLE2ExtractorFactory.java (original)
+++ poi/trunk/src/java/org/apache/poi/extractor/ExtractorFactory.java Thu Aug 13 21:08:24 2020
@@ -16,30 +16,33 @@
 ==================================================================== */
 package org.apache.poi.extractor;
 
-import static org.apache.poi.hssf.model.InternalWorkbook.OLD_WORKBOOK_DIR_ENTRY_NAME;
-import static org.apache.poi.hssf.model.InternalWorkbook.WORKBOOK_DIR_ENTRY_NAMES;
+import static org.apache.poi.hssf.record.crypto.Biff8EncryptionKey.getCurrentUserPassword;
+import static org.apache.poi.poifs.crypt.EncryptionInfo.ENCRYPTION_INFO_ENTRY;
 
+import java.io.File;
 import java.io.IOException;
 import java.io.InputStream;
-import java.lang.reflect.Method;
 import java.util.ArrayList;
-import java.util.Iterator;
 import java.util.List;
+import java.util.ServiceLoader;
+import java.util.stream.StreamSupport;
 
-import org.apache.poi.hssf.OldExcelFormatException;
-import org.apache.poi.hssf.extractor.EventBasedExcelExtractor;
+import org.apache.poi.EmptyFileException;
 import org.apache.poi.hssf.extractor.ExcelExtractor;
+import org.apache.poi.poifs.crypt.Decryptor;
 import org.apache.poi.poifs.filesystem.DirectoryEntry;
 import org.apache.poi.poifs.filesystem.DirectoryNode;
 import org.apache.poi.poifs.filesystem.Entry;
+import org.apache.poi.poifs.filesystem.FileMagic;
 import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+import org.apache.poi.util.IOUtils;
 import org.apache.poi.util.POILogFactory;
 import org.apache.poi.util.POILogger;
 
 /**
  * Figures out the correct POIOLE2TextExtractor for your supplied
  *  document, and returns it.
- *  
+ *
  * <p>Note 1 - will fail for many file formats if the POI Scratchpad jar is
  *  not present on the runtime classpath</p>
  * <p>Note 2 - for text extractor creation across all formats, use
@@ -49,16 +52,29 @@ import org.apache.poi.util.POILogger;
  *  off switching to <a href="http://tika.apache.org">Apache Tika</a> instead!</p>
  */
 @SuppressWarnings({"WeakerAccess", "JavadocReference"})
-public final class OLE2ExtractorFactory {
-    private static final POILogger LOGGER = POILogFactory.getLogger(OLE2ExtractorFactory.class); 
-    
+public final class ExtractorFactory {
+    private static final POILogger LOGGER = POILogFactory.getLogger(ExtractorFactory.class);
+
     /** Should this thread prefer event based over usermodel based extractors? */
     private static final ThreadLocal<Boolean> threadPreferEventExtractors = ThreadLocal.withInitial(() -> Boolean.FALSE);
 
     /** Should all threads prefer event based over usermodel based extractors? */
     private static Boolean allPreferEventExtractors;
 
-    private OLE2ExtractorFactory() {
+
+    private static class Singleton {
+        private static final ExtractorFactory INSTANCE = new ExtractorFactory();
+    }
+
+    private interface ProviderMethod {
+        POITextExtractor create(ExtractorProvider prov) throws IOException;
+    }
+
+    private final List<ExtractorProvider> provider = new ArrayList<>();
+
+
+    private ExtractorFactory() {
+        ServiceLoader.load(ExtractorProvider.class).forEach(provider::add);
     }
 
     /**
@@ -110,63 +126,80 @@ public final class OLE2ExtractorFactory
      * @return If the current thread should use event based extractors.
      */
     public static boolean getPreferEventExtractor() {
-        if(allPreferEventExtractors != null) {
-            return allPreferEventExtractors;
-        }
-        return threadPreferEventExtractors.get();
+        return (allPreferEventExtractors != null) ? allPreferEventExtractors : threadPreferEventExtractors.get();
     }
 
-    @SuppressWarnings("unchecked")
-    public static <T extends POITextExtractor> T createExtractor(POIFSFileSystem fs) throws IOException {
-        return (T)createExtractor(fs.getRoot());
+    public static POITextExtractor createExtractor(POIFSFileSystem fs) throws IOException {
+        return createExtractor(fs, getCurrentUserPassword());
     }
 
-    @SuppressWarnings("unchecked")
-    public static <T extends POITextExtractor> T createExtractor(InputStream input) throws IOException {
-        Class<?> cls = getOOXMLClass();
-        if (cls != null) {
-            // Use Reflection to get us the full OOXML-enabled version
-            try {
-                Method m = cls.getDeclaredMethod("createExtractor", InputStream.class);
-                return (T)m.invoke(null, input);
-            } catch (IllegalArgumentException iae) {
-                throw iae;
-            } catch (Exception e) {
-                throw new IllegalArgumentException("Error creating Extractor for InputStream", e);
-            }
-        } else {
-            // Best hope it's OLE2....
-            return createExtractor(new POIFSFileSystem(input));
-        }
+    public static POITextExtractor createExtractor(POIFSFileSystem fs, String password) throws IOException {
+        return createExtractor(fs.getRoot(), password);
     }
 
-    private static Class<?> getOOXMLClass() {
-        try {
-            return OLE2ExtractorFactory.class.getClassLoader().loadClass(
-                    "org.apache.poi.extractor.ExtractorFactory"
-            );
-        } catch (ClassNotFoundException e) {
-            LOGGER.log(POILogger.WARN, "POI OOXML jar missing");
-            return null;
+    public static POITextExtractor createExtractor(InputStream input) throws IOException {
+        return createExtractor(input, getCurrentUserPassword());
+    }
+
+    public static POITextExtractor createExtractor(InputStream input, String password) throws IOException {
+        final InputStream is = FileMagic.prepareToCheckMagic(input);
+        byte[] emptyFileCheck = new byte[1];
+        is.mark(emptyFileCheck.length);
+        if (is.read(emptyFileCheck) < emptyFileCheck.length) {
+            throw new EmptyFileException();
+        }
+        is.reset();
+
+        final FileMagic fm = FileMagic.valueOf(is);
+        if (FileMagic.OOXML == fm) {
+            return wp(fm, w -> w.create(is, password));
+        }
+
+        if (FileMagic.OLE2 != fm) {
+            throw new IOException("Can't create extractor - unsupported file type: "+fm);
         }
+
+        POIFSFileSystem poifs = new POIFSFileSystem(is);
+        boolean isOOXML = poifs.getRoot().hasEntry(ENCRYPTION_INFO_ENTRY);
+
+        return wp(isOOXML ? FileMagic.OOXML : fm, w -> w.create(poifs.getRoot(), password));
+    }
+
+    public static POITextExtractor createExtractor(File file) throws IOException {
+        return createExtractor(file, getCurrentUserPassword());
     }
-    private static Class<?> getScratchpadClass() {
+
+    public static POITextExtractor createExtractor(File file, String password) throws IOException {
+        if (file.length() == 0) {
+            throw new EmptyFileException();
+        }
+
+        final FileMagic fm = FileMagic.valueOf(file);
+        if (FileMagic.OOXML == fm) {
+            return wp(fm, w -> w.create(file, password));
+        }
+
+        if (FileMagic.OLE2 != fm) {
+            throw new IOException("Can't create extractor - unsupported file type: "+fm);
+        }
+
+        POIFSFileSystem poifs = new POIFSFileSystem(file, true);
         try {
-            return OLE2ExtractorFactory.class.getClassLoader().loadClass(
-                    "org.apache.poi.extractor.ole2.OLE2ScratchpadExtractorFactory"
-            );
-        } catch (ClassNotFoundException e) {
-            LOGGER.log(POILogger.ERROR, "POI Scratchpad jar missing");
-            throw new IllegalStateException("POI Scratchpad jar missing, required for ExtractorFactory");
+            boolean isOOXML = poifs.getRoot().hasEntry(ENCRYPTION_INFO_ENTRY);
+            return wp(isOOXML ? FileMagic.OOXML : fm, w -> w.create(poifs.getRoot(), password));
+        } catch (IOException | RuntimeException e) {
+            IOUtils.closeQuietly(poifs);
+            throw e;
         }
     }
-    
+
+
     /**
      * Create the Extractor, if possible. Generally needs the Scratchpad jar.
      * Note that this won't check for embedded OOXML resources either, use
      *  {@link org.apache.poi.ooxml.extractor.ExtractorFactory} for that.
      *
-     * @param poifsDir The {@link DirectoryNode} pointing to a document.
+     * @param root The {@link DirectoryNode} pointing to a document.
      *
      * @return The resulting {@link POITextExtractor}, an exception is thrown if
      *      no TextExtractor can be created for some reason.
@@ -176,54 +209,40 @@ public final class OLE2ExtractorFactory
      *      an unsupported version of Excel.
      * @throws IllegalArgumentException If creating the Extractor fails
      */
-    public static POITextExtractor createExtractor(DirectoryNode poifsDir) throws IOException {
-        // Look for certain entries in the stream, to figure it
-        // out from
-        for (String workbookName : WORKBOOK_DIR_ENTRY_NAMES) {
-            if (poifsDir.hasEntry(workbookName)) {
-                if (getPreferEventExtractor()) {
-                    return new EventBasedExcelExtractor(poifsDir);
-                }
-                return new ExcelExtractor(poifsDir);
-            }
-        }
-        if (poifsDir.hasEntry(OLD_WORKBOOK_DIR_ENTRY_NAME)) {
-            throw new OldExcelFormatException("Old Excel Spreadsheet format (1-95) "
-                    + "found. Please call OldExcelExtractor directly for basic text extraction");
-        }
-        
-        // Ask Scratchpad, or fail trying
-        Class<?> cls = getScratchpadClass();
-        try {
-            Method m = cls.getDeclaredMethod("createExtractor", DirectoryNode.class);
-            POITextExtractor ext = (POITextExtractor)m.invoke(null, poifsDir);
-            if (ext != null) return ext;
-        } catch (IllegalArgumentException iae) {
-            throw iae;
-        } catch (Exception e) {
-            throw new IllegalArgumentException("Error creating Scratchpad Extractor", e);
-        }
+    public static POITextExtractor createExtractor(DirectoryNode root) throws IOException {
+        return createExtractor(root, getCurrentUserPassword());
+    }
 
-        throw new IllegalArgumentException("No supported documents found in the OLE2 stream");
+    public static POITextExtractor createExtractor(final DirectoryNode root, String password) throws IOException {
+        // Encrypted OOXML files go inside OLE2 containers, is this one?
+        if (root.hasEntry(Decryptor.DEFAULT_POIFS_ENTRY) || root.hasEntry("Package")) {
+            return wp(FileMagic.OOXML, w -> w.create(root, password));
+        } else {
+            return wp(FileMagic.OLE2, w ->  w.create(root, password));
+        }
     }
 
-    /**
-     * Returns an array of text extractors, one for each of
-     *  the embedded documents in the file (if there are any).
-     * If there are no embedded documents, you'll get back an
-     *  empty array. Otherwise, you'll get one open
-     *  {@link POITextExtractor} for each embedded file.
-     *
-     * @param ext The extractor to look at for embedded documents
-     *
-     * @return An array of resulting extractors. Empty if no embedded documents are found.
-     *
-     * @throws IOException If converting the {@link DirectoryNode} into a HSSFWorkbook fails
-     * @throws OldFileFormatException If the {@link DirectoryNode} points to a format of
-     *      an unsupported version of Excel.
-     * @throws IllegalArgumentException If creating the Extractor fails
-     */
-    public static POITextExtractor[] getEmbededDocsTextExtractors(POIOLE2TextExtractor ext) throws IOException {
+        /**
+         * Returns an array of text extractors, one for each of
+         *  the embedded documents in the file (if there are any).
+         * If there are no embedded documents, you'll get back an
+         *  empty array. Otherwise, you'll get one open
+         *  {@link POITextExtractor} for each embedded file.
+         *
+         * @param ext The extractor to look at for embedded documents
+         *
+         * @return An array of resulting extractors. Empty if no embedded documents are found.
+         *
+         * @throws IOException If converting the {@link DirectoryNode} into a HSSFWorkbook fails
+         * @throws OldFileFormatException If the {@link DirectoryNode} points to a format of
+         *      an unsupported version of Excel.
+         * @throws IllegalArgumentException If creating the Extractor fails
+         */
+    public static POITextExtractor[] getEmbeddedDocsTextExtractors(POIOLE2TextExtractor ext) throws IOException {
+        if (ext == null) {
+            throw new IllegalStateException("extractor must be given");
+        }
+
         // All the embedded directories we spotted
         List<Entry> dirs = new ArrayList<>();
         // For anything else not directly held in as a POIFS directory
@@ -237,22 +256,15 @@ public final class OLE2ExtractorFactory
 
         if(ext instanceof ExcelExtractor) {
             // These are in MBD... under the root
-            Iterator<Entry> it = root.getEntries();
-            while(it.hasNext()) {
-                Entry entry = it.next();
-                if(entry.getName().startsWith("MBD")) {
-                    dirs.add(entry);
-                }
-            }
+            StreamSupport.stream(root.spliterator(), false)
+                .filter(entry -> entry.getName().startsWith("MBD"))
+                .forEach(dirs::add);
         } else {
-            // Ask Scratchpad, or fail trying
-            Class<?> cls = getScratchpadClass();
-            try {
-                Method m = cls.getDeclaredMethod(
-                        "identifyEmbeddedResources", POIOLE2TextExtractor.class, List.class, List.class);
-                m.invoke(null, ext, dirs, nonPOIFS);
-            } catch (Exception e) {
-                throw new IllegalArgumentException("Error checking for Scratchpad embedded resources", e);
+            for (ExtractorProvider prov : Singleton.INSTANCE.provider) {
+                if (prov.accepts(FileMagic.OLE2)) {
+                    prov.identifyEmbeddedResources(ext, dirs, nonPOIFS);
+                    break;
+                }
             }
         }
 
@@ -261,19 +273,32 @@ public final class OLE2ExtractorFactory
             return new POITextExtractor[0];
         }
 
-        ArrayList<POITextExtractor> e = new ArrayList<>();
+        ArrayList<POITextExtractor> textExtractors = new ArrayList<>();
         for (Entry dir : dirs) {
-            e.add(createExtractor((DirectoryNode) dir
-            ));
+            textExtractors.add(createExtractor((DirectoryNode) dir));
         }
         for (InputStream stream : nonPOIFS) {
             try {
-                e.add(createExtractor(stream));
-            } catch (Exception xe) {
-                // Ignore, invalid format
-                LOGGER.log(POILogger.WARN, xe);
+                textExtractors.add(createExtractor(stream));
+            } catch (IOException e) {
+                // Ignore, just means it didn't contain a format we support as yet
+                LOGGER.log(POILogger.INFO, "Format not supported yet", e.getLocalizedMessage());
             }
         }
-        return e.toArray(new POITextExtractor[0]);
+        return textExtractors.toArray(new POITextExtractor[0]);
     }
+
+    private static POITextExtractor wp(FileMagic fm, ProviderMethod fun) throws IOException {
+        for (ExtractorProvider prov : Singleton.INSTANCE.provider) {
+            if (prov.accepts(fm)) {
+                POITextExtractor ext = fun.create(prov);
+                if (ext != null) {
+                    return ext;
+                }
+            }
+        }
+        throw new IOException("Your InputStream was neither an OLE2 stream, nor an OOXML stream " +
+            "or you haven't provide the poi-ooxml*.jar and/or poi-scratchpad*.jar in the classpath/modulepath - FileMagic: "+fm);
+    }
+
 }

Added: poi/trunk/src/java/org/apache/poi/extractor/ExtractorProvider.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/java/org/apache/poi/extractor/ExtractorProvider.java?rev=1880839&view=auto
==============================================================================
--- poi/trunk/src/java/org/apache/poi/extractor/ExtractorProvider.java (added)
+++ poi/trunk/src/java/org/apache/poi/extractor/ExtractorProvider.java Thu Aug 13 21:08:24 2020
@@ -0,0 +1,76 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
+package org.apache.poi.extractor;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.List;
+
+import org.apache.poi.poifs.filesystem.DirectoryNode;
+import org.apache.poi.poifs.filesystem.Entry;
+import org.apache.poi.poifs.filesystem.FileMagic;
+
+public interface ExtractorProvider {
+    boolean accepts(FileMagic fm);
+
+    /**
+     * Create Extractor via file
+     * @param file the file
+     * @param password the password or {@code null} if not encrypted
+     * @return the extractor
+     * @throws IOException if file can't be read or parsed
+     */
+    POITextExtractor create(File file, String password) throws IOException;
+
+    /**
+     * Create Extractor via InputStream
+     * @param inputStream the stream
+     * @param password the password or {@code null} if not encrypted
+     * @return the extractor
+     * @throws IOException if stream can't be read or parsed
+     */
+    POITextExtractor create(InputStream inputStream, String password) throws IOException;
+
+    /**
+     * Create Extractor from POIFS node
+     * @param poifsDir the node
+     * @param password the password or {@code null} if not encrypted
+     * @return the extractor
+     * @throws IOException if node can't be parsed
+     */
+    POITextExtractor create(DirectoryNode poifsDir, String password) throws IOException;
+
+    /**
+     * Returns an array of text extractors, one for each of
+     *  the embedded documents in the file (if there are any).
+     * If there are no embedded documents, you'll get back an
+     *  empty array. Otherwise, you'll get one open
+     *  {@link POITextExtractor} for each embedded file.
+     *
+     * @param ext the extractor holding the directory to start parsing
+     * @param dirs a list to be filled with directory references holding embedded
+     * @param nonPOIFS a list to be filled with streams which aren't based on POIFS entries
+     *
+     * @throws IOException when the format specific extraction fails because of invalid entires
+     */
+    default void identifyEmbeddedResources(POIOLE2TextExtractor ext, List<Entry> dirs, List<InputStream> nonPOIFS) throws IOException {
+        throw new IllegalArgumentException("Error checking for Scratchpad embedded resources");
+    }
+
+}

Propchange: poi/trunk/src/java/org/apache/poi/extractor/ExtractorProvider.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: poi/trunk/src/java/org/apache/poi/extractor/MainExtractorFactory.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/java/org/apache/poi/extractor/MainExtractorFactory.java?rev=1880839&view=auto
==============================================================================
--- poi/trunk/src/java/org/apache/poi/extractor/MainExtractorFactory.java (added)
+++ poi/trunk/src/java/org/apache/poi/extractor/MainExtractorFactory.java Thu Aug 13 21:08:24 2020
@@ -0,0 +1,76 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
+package org.apache.poi.extractor;
+
+import static org.apache.poi.hssf.model.InternalWorkbook.WORKBOOK_DIR_ENTRY_NAMES;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+
+import org.apache.poi.hssf.extractor.EventBasedExcelExtractor;
+import org.apache.poi.hssf.extractor.ExcelExtractor;
+import org.apache.poi.hssf.extractor.OldExcelExtractor;
+import org.apache.poi.hssf.model.InternalWorkbook;
+import org.apache.poi.hssf.record.crypto.Biff8EncryptionKey;
+import org.apache.poi.poifs.filesystem.DirectoryNode;
+import org.apache.poi.poifs.filesystem.FileMagic;
+import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+
+/**
+ * ExtractorFactory for HSSF and Old Excel format
+ */
+public class MainExtractorFactory implements ExtractorProvider {
+    @Override
+    public boolean accepts(FileMagic fm) {
+        return FileMagic.OLE2 == fm;
+    }
+
+    @Override
+    public POITextExtractor create(File file, String password) throws IOException {
+        return create(new POIFSFileSystem(file, true).getRoot(), password);
+    }
+
+    @Override
+    public POITextExtractor create(InputStream inputStream, String password) throws IOException {
+        return create(new POIFSFileSystem(inputStream).getRoot(), password);
+    }
+
+    @Override
+    public POITextExtractor create(DirectoryNode poifsDir, String password) throws IOException {
+        final String oldPW = Biff8EncryptionKey.getCurrentUserPassword();
+        try {
+            Biff8EncryptionKey.setCurrentUserPassword(password);
+
+            // Look for certain entries in the stream, to figure it out from
+            for (String workbookName : WORKBOOK_DIR_ENTRY_NAMES) {
+                if (poifsDir.hasEntry(workbookName)) {
+                    return ExtractorFactory.getPreferEventExtractor() ? new EventBasedExcelExtractor(poifsDir) : new ExcelExtractor(poifsDir);
+                }
+            }
+
+            if (poifsDir.hasEntry(InternalWorkbook.OLD_WORKBOOK_DIR_ENTRY_NAME)) {
+                return new OldExcelExtractor(poifsDir);
+            }
+        } finally {
+            Biff8EncryptionKey.setCurrentUserPassword(oldPW);
+        }
+
+        return null;
+    }
+}

Propchange: poi/trunk/src/java/org/apache/poi/extractor/MainExtractorFactory.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: poi/trunk/src/java/org/apache/poi/extractor/POIOLE2TextExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/java/org/apache/poi/extractor/POIOLE2TextExtractor.java?rev=1880839&r1=1880838&r2=1880839&view=diff
==============================================================================
--- poi/trunk/src/java/org/apache/poi/extractor/POIOLE2TextExtractor.java (original)
+++ poi/trunk/src/java/org/apache/poi/extractor/POIOLE2TextExtractor.java Thu Aug 13 21:08:24 2020
@@ -30,55 +30,28 @@ import org.apache.poi.poifs.filesystem.D
  *  org.apache.poi.[format].extractor .
  *
  * @see org.apache.poi.hssf.extractor.ExcelExtractor
- * @see org.apache.poi.hslf.extractor.PowerPointExtractor
  * @see org.apache.poi.hdgf.extractor.VisioTextExtractor
  * @see org.apache.poi.hwpf.extractor.WordExtractor
  */
-public abstract class POIOLE2TextExtractor extends POITextExtractor {
-	/** The POIDocument that's open */
-	protected POIDocument document;
-
-	/**
-	 * Creates a new text extractor for the given document
-	 *
-	 * @param document The POIDocument to use in this extractor.
-	 */
-	public POIOLE2TextExtractor(POIDocument document) {
-		this.document = document;
-
-		// Ensure any underlying resources, such as open files,
-		//  will get cleaned up if the user calls #close()
-		setFilesystem(document);
-	}
-
-	/**
-	 * Creates a new text extractor, using the same
-	 *  document as another text extractor. Normally
-	 *  only used by properties extractors.
-	 *
-	 * @param otherExtractor the extractor which document to be used
-	 */
-	protected POIOLE2TextExtractor(POIOLE2TextExtractor otherExtractor) {
-		this.document = otherExtractor.document;
-	}
-
+public interface POIOLE2TextExtractor extends POITextExtractor {
 	/**
 	 * Returns the document information metadata for the document
 	 *
      * @return The Document Summary Information or null
      *      if it could not be read for this document.
 	 */
-	public DocumentSummaryInformation getDocSummaryInformation() {
-		return document.getDocumentSummaryInformation();
+	default DocumentSummaryInformation getDocSummaryInformation() {
+		return getDocument().getDocumentSummaryInformation();
 	}
+
 	/**
 	 * Returns the summary information metadata for the document.
 	 *
      * @return The Summary information for the document or null
      *      if it could not be read for this document.
 	 */
-	public SummaryInformation getSummaryInformation() {
-		return document.getSummaryInformation();
+	default SummaryInformation getSummaryInformation() {
+		return getDocument().getSummaryInformation();
 	}
 
 	/**
@@ -88,7 +61,7 @@ public abstract class POIOLE2TextExtract
 	 * @return an instance of POIExtractor that can extract meta-data.
 	 */
 	@Override
-    public POITextExtractor getMetadataTextExtractor() {
+    default POITextExtractor getMetadataTextExtractor() {
 		return new HPSFPropertiesExtractor(this);
 	}
 
@@ -97,8 +70,8 @@ public abstract class POIOLE2TextExtract
 	 *
 	 * @return the DirectoryEntry that is associated with the POIDocument of this extractor.
 	 */
-    public DirectoryEntry getRoot() {
-        return document.getDirectory();
+    default DirectoryEntry getRoot() {
+        return getDocument().getDirectory();
     }
 
     /**
@@ -107,7 +80,5 @@ public abstract class POIOLE2TextExtract
      * @return the underlying POIDocument
      */
     @Override
-    public POIDocument getDocument() {
-        return document;
-    }
+    POIDocument getDocument();
 }
\ No newline at end of file

Modified: poi/trunk/src/java/org/apache/poi/extractor/POITextExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/java/org/apache/poi/extractor/POITextExtractor.java?rev=1880839&r1=1880838&r2=1880839&view=diff
==============================================================================
--- poi/trunk/src/java/org/apache/poi/extractor/POITextExtractor.java (original)
+++ poi/trunk/src/java/org/apache/poi/extractor/POITextExtractor.java Thu Aug 13 21:08:24 2020
@@ -21,19 +21,16 @@ import java.io.IOException;
 
 /**
  * Common Parent for Text Extractors
- *  of POI Documents. 
+ *  of POI Documents.
  * You will typically find the implementation of
  *  a given format's text extractor under
  *  org.apache.poi.[format].extractor .
- *  
+ *
  * @see org.apache.poi.hssf.extractor.ExcelExtractor
- * @see org.apache.poi.hslf.extractor.PowerPointExtractor
  * @see org.apache.poi.hdgf.extractor.VisioTextExtractor
  * @see org.apache.poi.hwpf.extractor.WordExtractor
  */
-public abstract class POITextExtractor implements Closeable {
-    private Closeable fsToClose;
-    
+public interface POITextExtractor extends Closeable {
 	/**
 	 * Retrieves all the text from the document.
 	 * How cells, paragraphs etc are separated in the text
@@ -41,42 +38,50 @@ public abstract class POITextExtractor i
 	 *  a specific project for details.
 	 * @return All the text from the document
 	 */
-	public abstract String getText();
-	
+	String getText();
+
 	/**
 	 * Returns another text extractor, which is able to
 	 *  output the textual content of the document
 	 *  metadata / properties, such as author and title.
-	 * 
+	 *
 	 * @return the metadata and text extractor
 	 */
-	public abstract POITextExtractor getMetadataTextExtractor();
+	POITextExtractor getMetadataTextExtractor();
 
 	/**
-	 * Used to ensure file handle cleanup.
-	 * 
-	 * @param fs filesystem to close
+	 * @param doCloseFilesystem {@code true} (default), if underlying resources/filesystem should be
+	 *        closed on {@link #close()}
 	 */
-	public void setFilesystem(Closeable fs) {
-	    fsToClose = fs;
-	}
-	
+	void setCloseFilesystem(boolean doCloseFilesystem);
+
+	/**
+	 * @return {@code true}, if resources/filesystem should be closed on {@link #close()}
+	 */
+	boolean isCloseFilesystem();
+
+	/**
+	 * @return The underlying resources/filesystem
+	 */
+	Closeable getFilesystem();
+
 	/**
 	 * Allows to free resources of the Extractor as soon as
 	 * it is not needed any more. This may include closing
 	 * open file handles and freeing memory.
-	 * 
+	 *
 	 * The Extractor cannot be used after close has been called.
 	 */
 	@Override
-    public void close() throws IOException {
-		if(fsToClose != null) {
-		    fsToClose.close();
+    default void close() throws IOException {
+		Closeable fs = getFilesystem();
+		if (isCloseFilesystem() && fs != null) {
+			fs.close();
 		}
 	}
 
 	/**
 	 * @return the processed document
 	 */
-	public abstract Object getDocument();
+	Object getDocument();
 }

Modified: poi/trunk/src/java/org/apache/poi/hpsf/extractor/HPSFPropertiesExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/java/org/apache/poi/hpsf/extractor/HPSFPropertiesExtractor.java?rev=1880839&r1=1880838&r2=1880839&view=diff
==============================================================================
--- poi/trunk/src/java/org/apache/poi/hpsf/extractor/HPSFPropertiesExtractor.java (original)
+++ poi/trunk/src/java/org/apache/poi/hpsf/extractor/HPSFPropertiesExtractor.java Thu Aug 13 21:08:24 2020
@@ -17,9 +17,6 @@
 
 package org.apache.poi.hpsf.extractor;
 
-import java.io.File;
-import java.io.IOException;
-
 import org.apache.poi.POIDocument;
 import org.apache.poi.extractor.POIOLE2TextExtractor;
 import org.apache.poi.extractor.POITextExtractor;
@@ -37,15 +34,20 @@ import org.apache.poi.poifs.filesystem.P
  *  build in and custom, returning them in
  *  textual form.
  */
-public class HPSFPropertiesExtractor extends POIOLE2TextExtractor {
+public class HPSFPropertiesExtractor implements POIOLE2TextExtractor {
+    private final POIDocument document;
+    private boolean doCloseFilesystem = true;
+
     public HPSFPropertiesExtractor(POIOLE2TextExtractor mainExtractor) {
-        super(mainExtractor);
+        document = mainExtractor.getDocument();
     }
-    public HPSFPropertiesExtractor(POIDocument doc) {
-        super(doc);
+
+    public HPSFPropertiesExtractor(POIDocument document) {
+        this.document = document;
     }
+
     public HPSFPropertiesExtractor(POIFSFileSystem fs) {
-        super(new HPSFPropertiesOnlyDocument(fs));
+        document = new HPSFPropertiesOnlyDocument(fs);
     }
 
     public String getDocumentSummaryInformationText() {
@@ -122,11 +124,11 @@ public class HPSFPropertiesExtractor ext
     }
 
     private static String getPropertyValueText(Object val) {
-        return (val == null) 
+        return (val == null)
             ? "(not set)"
             : PropertySet.getPropertyStringValue(val);
     }
-    
+
     @Override
     public boolean equals(Object o) {
         return super.equals(o);
@@ -137,12 +139,23 @@ public class HPSFPropertiesExtractor ext
         return super.hashCode();
     }
 
-    public static void main(String[] args) throws IOException {
-        for (String file : args) {
-            try (HPSFPropertiesExtractor ext = new HPSFPropertiesExtractor(
-                    new POIFSFileSystem(new File(file)))) {
-                System.out.println(ext.getText());
-            }
-        }
+    @Override
+    public POIDocument getDocument() {
+        return document;
+    }
+
+    @Override
+    public void setCloseFilesystem(boolean doCloseFilesystem) {
+        this.doCloseFilesystem = doCloseFilesystem;
+    }
+
+    @Override
+    public boolean isCloseFilesystem() {
+        return doCloseFilesystem;
+    }
+
+    @Override
+    public POIDocument getFilesystem() {
+        return document;
     }
 }

Modified: poi/trunk/src/java/org/apache/poi/hssf/extractor/EventBasedExcelExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/java/org/apache/poi/hssf/extractor/EventBasedExcelExtractor.java?rev=1880839&r1=1880838&r2=1880839&view=diff
==============================================================================
--- poi/trunk/src/java/org/apache/poi/hssf/extractor/EventBasedExcelExtractor.java (original)
+++ poi/trunk/src/java/org/apache/poi/hssf/extractor/EventBasedExcelExtractor.java Thu Aug 13 21:08:24 2020
@@ -17,6 +17,7 @@
 
 package org.apache.poi.hssf.extractor;
 
+import java.io.Closeable;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
@@ -37,9 +38,9 @@ import org.apache.poi.hssf.record.LabelR
 import org.apache.poi.hssf.record.LabelSSTRecord;
 import org.apache.poi.hssf.record.NoteRecord;
 import org.apache.poi.hssf.record.NumberRecord;
-import org.apache.poi.hssf.record.Record;
 import org.apache.poi.hssf.record.SSTRecord;
 import org.apache.poi.hssf.record.StringRecord;
+import org.apache.poi.poifs.filesystem.DirectoryEntry;
 import org.apache.poi.poifs.filesystem.DirectoryNode;
 import org.apache.poi.poifs.filesystem.POIFSFileSystem;
 
@@ -56,29 +57,31 @@ import org.apache.poi.poifs.filesystem.P
  * To turn an excel file into a CSV or similar, then see
  *  the XLS2CSVmra example
  * </p>
- * 
+ *
  * @see <a href="http://svn.apache.org/repos/asf/poi/trunk/src/examples/src/org/apache/poi/hssf/eventusermodel/examples/XLS2CSVmra.java">XLS2CSVmra</a>
  */
-public class EventBasedExcelExtractor extends POIOLE2TextExtractor implements org.apache.poi.ss.extractor.ExcelExtractor {
-    private DirectoryNode _dir;
+public class EventBasedExcelExtractor implements POIOLE2TextExtractor, org.apache.poi.ss.extractor.ExcelExtractor {
+    private final POIFSFileSystem poifs;
+    private final DirectoryNode _dir;
+    private boolean doCloseFilesystem = true;
     boolean _includeSheetNames = true;
     boolean _formulasNotResults;
 
-    public EventBasedExcelExtractor( DirectoryNode dir )
-    {
-        super( (POIDocument)null );
+    public EventBasedExcelExtractor(DirectoryNode dir) {
+        poifs = null;
         _dir = dir;
     }
 
    public EventBasedExcelExtractor(POIFSFileSystem fs) {
-      this(fs.getRoot());
-      super.setFilesystem(fs);
+        poifs = fs;
+        _dir = fs.getRoot();
    }
 
    /**
     * Would return the document information metadata for the document,
     *  if we supported it
     */
+   @Override
    public DocumentSummaryInformation getDocSummaryInformation() {
        throw new IllegalStateException("Metadata extraction not supported in streaming mode, please use ExcelExtractor");
    }
@@ -86,6 +89,7 @@ public class EventBasedExcelExtractor ex
     * Would return the summary information metadata for the document,
     *  if we supported it
     */
+   @Override
    public SummaryInformation getSummaryInformation() {
        throw new IllegalStateException("Metadata extraction not supported in streaming mode, please use ExcelExtractor");
    }
@@ -262,4 +266,29 @@ public class EventBasedExcelExtractor ex
            }
        }
    }
+
+    @Override
+    public void setCloseFilesystem(boolean doCloseFilesystem) {
+        this.doCloseFilesystem = doCloseFilesystem;
+    }
+
+    @Override
+    public boolean isCloseFilesystem() {
+        return doCloseFilesystem;
+    }
+
+    @Override
+    public Closeable getFilesystem() {
+        return poifs;
+    }
+
+    @Override
+    public POIDocument getDocument() {
+        return null;
+    }
+
+    @Override
+    public DirectoryEntry getRoot() {
+        return _dir;
+    }
 }

Modified: poi/trunk/src/java/org/apache/poi/hssf/extractor/ExcelExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/java/org/apache/poi/hssf/extractor/ExcelExtractor.java?rev=1880839&r1=1880838&r2=1880839&view=diff
==============================================================================
--- poi/trunk/src/java/org/apache/poi/hssf/extractor/ExcelExtractor.java (original)
+++ poi/trunk/src/java/org/apache/poi/hssf/extractor/ExcelExtractor.java Thu Aug 13 21:08:24 2020
@@ -50,12 +50,13 @@ import org.apache.poi.ss.usermodel.Row.M
  * To turn an excel file into a CSV or similar, then see
  *  the XLS2CSVmra example
  * </p>
- * 
+ *
  * @see <a href="http://svn.apache.org/repos/asf/poi/trunk/src/examples/src/org/apache/poi/hssf/eventusermodel/examples/XLS2CSVmra.java">XLS2CSVmra</a>
  */
-public class ExcelExtractor extends POIOLE2TextExtractor implements org.apache.poi.ss.extractor.ExcelExtractor {
+public class ExcelExtractor implements POIOLE2TextExtractor, org.apache.poi.ss.extractor.ExcelExtractor {
 	private final HSSFWorkbook _wb;
 	private final HSSFDataFormatter _formatter;
+	private boolean doCloseFilesystem = true;
 	private boolean _includeSheetNames = true;
 	private boolean _shouldEvaluateFormulas = true;
 	private boolean _includeCellComments;
@@ -63,13 +64,14 @@ public class ExcelExtractor extends POIO
 	private boolean _includeHeadersFooters = true;
 
 	public ExcelExtractor(HSSFWorkbook wb) {
-		super(wb);
 		_wb = wb;
 		_formatter = new HSSFDataFormatter();
 	}
+
 	public ExcelExtractor(POIFSFileSystem fs) throws IOException {
 		this(fs.getRoot());
 	}
+
 	public ExcelExtractor(DirectoryNode dir) throws IOException {
 		this(new HSSFWorkbook(dir, true));
 	}
@@ -201,9 +203,9 @@ public class ExcelExtractor extends POIO
 
 	/**
 	 * Command line extractor.
-	 * 
+	 *
 	 * @param args the command line parameters
-	 * 
+	 *
 	 * @throws IOException if the file can't be read or contains errors
 	 */
 	public static void main(String[] args) throws IOException {
@@ -225,7 +227,7 @@ public class ExcelExtractor extends POIO
 
 		try (InputStream is = cmdArgs.getInputFile() == null ? System.in : new FileInputStream(cmdArgs.getInputFile());
 			 HSSFWorkbook wb = new HSSFWorkbook(is);
-			 ExcelExtractor extractor = new ExcelExtractor(wb);
+			 ExcelExtractor extractor = new ExcelExtractor(wb)
 		) {
 			extractor.setIncludeSheetNames(cmdArgs.shouldShowSheetNames());
 			extractor.setFormulasNotResults(!cmdArgs.shouldEvaluateFormulas());
@@ -255,7 +257,7 @@ public class ExcelExtractor extends POIO
 	 * Should blank cells be output? Default is to only
 	 *  output cells that are present in the file and are
 	 *  non-blank.
-	 * 
+	 *
 	 * @param includeBlankCells {@code true} if blank cells should be included
 	 */
 	public void setIncludeBlankCells(boolean includeBlankCells) {
@@ -411,4 +413,24 @@ public class ExcelExtractor extends POIO
 
 		return text.toString();
 	}
+
+	@Override
+	public HSSFWorkbook getDocument() {
+		return _wb;
+	}
+
+	@Override
+	public void setCloseFilesystem(boolean doCloseFilesystem) {
+		this.doCloseFilesystem = doCloseFilesystem;
+	}
+
+	@Override
+	public boolean isCloseFilesystem() {
+		return doCloseFilesystem;
+	}
+
+	@Override
+	public HSSFWorkbook getFilesystem() {
+		return _wb;
+	}
 }

Modified: poi/trunk/src/java/org/apache/poi/hssf/extractor/OldExcelExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/java/org/apache/poi/hssf/extractor/OldExcelExtractor.java?rev=1880839&r1=1880838&r2=1880839&view=diff
==============================================================================
--- poi/trunk/src/java/org/apache/poi/hssf/extractor/OldExcelExtractor.java (original)
+++ poi/trunk/src/java/org/apache/poi/hssf/extractor/OldExcelExtractor.java Thu Aug 13 21:08:24 2020
@@ -29,6 +29,7 @@ import java.io.IOException;
 import java.io.InputStream;
 
 import org.apache.poi.EncryptedDocumentException;
+import org.apache.poi.extractor.POITextExtractor;
 import org.apache.poi.hssf.OldExcelFormatException;
 import org.apache.poi.hssf.record.BOFRecord;
 import org.apache.poi.hssf.record.CodepageRecord;
@@ -58,7 +59,7 @@ import org.apache.poi.util.IOUtils;
  *  by Apache Tika, but not really intended for display to the user.
  * </p>
  */
-public class OldExcelExtractor implements Closeable {
+public class OldExcelExtractor implements POITextExtractor {
 
     private final static int FILE_PASS_RECORD_SID = 0x2f;
     //arbitrarily selected; may need to increase
@@ -295,24 +296,39 @@ public class OldExcelExtractor implement
             }
         }
 
-        close();
         ris = null;
 
         return text.toString();
     }
 
-    @Override
-    public void close() {
-        // some cases require this close here
-        if(toClose != null) {
-            IOUtils.closeQuietly(toClose);
-            toClose = null;
-        }
-    }
-
     protected void handleNumericCell(StringBuilder text, double value) {
         // TODO Need to fetch / use format strings
         text.append(value);
         text.append('\n');
     }
+
+    @Override
+    public POITextExtractor getMetadataTextExtractor() {
+        return null;
+    }
+
+    @Override
+    public void setCloseFilesystem(boolean doCloseFilesystem) {
+
+    }
+
+    @Override
+    public boolean isCloseFilesystem() {
+        return toClose != null;
+    }
+
+    @Override
+    public Closeable getFilesystem() {
+        return toClose;
+    }
+
+    @Override
+    public Object getDocument() {
+        return ris;
+    }
 }

Modified: poi/trunk/src/java/org/apache/poi/sl/extractor/SlideShowExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/java/org/apache/poi/sl/extractor/SlideShowExtractor.java?rev=1880839&r1=1880838&r2=1880839&view=diff
==============================================================================
--- poi/trunk/src/java/org/apache/poi/sl/extractor/SlideShowExtractor.java (original)
+++ poi/trunk/src/java/org/apache/poi/sl/extractor/SlideShowExtractor.java Thu Aug 13 21:08:24 2020
@@ -54,14 +54,14 @@ import org.apache.poi.util.POILogger;
 public class SlideShowExtractor<
     S extends Shape<S,P>,
     P extends TextParagraph<S,P,? extends TextRun>
-> extends POITextExtractor {
+> implements POITextExtractor {
     private static final POILogger LOG = POILogFactory.getLogger(SlideShowExtractor.class);
 
     // placeholder text for slide numbers
     private static final String SLIDE_NUMBER_PH = "‹#›";
 
 
-    private SlideShow<S,P> slideshow;
+    protected final SlideShow<S,P> slideshow;
 
     private boolean slidesByDefault = true;
     private boolean notesByDefault;
@@ -69,9 +69,9 @@ public class SlideShowExtractor<
     private boolean masterByDefault;
 
     private Predicate<Object> filter = o -> true;
+    private boolean doCloseFilesystem = true;
 
     public SlideShowExtractor(final SlideShow<S,P> slideshow) {
-        setFilesystem(slideshow);
         this.slideshow = slideshow;
     }
 
@@ -81,8 +81,8 @@ public class SlideShowExtractor<
      * @return the opened document
      */
     @Override
-    public final Object getDocument() {
-        return slideshow.getPersistDocument();
+    public SlideShow<S,P> getDocument() {
+        return slideshow;
     }
 
     /**
@@ -339,17 +339,17 @@ public class SlideShowExtractor<
             return raw;
         }
 
-        TextParagraph tp = tr.getParagraph();
-        TextShape ps = (tp != null) ? tp.getParentShape() : null;
-        Sheet sh = (ps != null) ? ps.getSheet() : null;
-        String slideNr = (sh instanceof Slide) ? Integer.toString(((Slide)sh).getSlideNumber() + 1) : "";
+        TextParagraph<?,?,?> tp = tr.getParagraph();
+        TextShape<?,?> ps = (tp != null) ? tp.getParentShape() : null;
+        Sheet<?,?> sh = (ps != null) ? ps.getSheet() : null;
+        String slideNr = (sh instanceof Slide) ? Integer.toString(((Slide<?,?>)sh).getSlideNumber() + 1) : "";
 
         return raw.replace(SLIDE_NUMBER_PH, slideNr);
     }
 
     private static String replaceTextCap(TextRun tr) {
-        final TextParagraph tp = tr.getParagraph();
-        final TextShape sh = (tp != null) ? tp.getParentShape() : null;
+        final TextParagraph<?,?,?> tp = tr.getParagraph();
+        final TextShape<?,?> sh = (tp != null) ? tp.getParentShape() : null;
         final Placeholder ph = (sh != null) ? sh.getPlaceholder() : null;
 
         // 0xB acts like cariage return in page titles and like blank in the others
@@ -438,4 +438,19 @@ public class SlideShowExtractor<
             (italic == null || tr.isItalic() == italic) &&
             (bold == null || tr.isBold() == bold);
     }
+
+    @Override
+    public void setCloseFilesystem(boolean doCloseFilesystem) {
+        this.doCloseFilesystem = doCloseFilesystem;
+    }
+
+    @Override
+    public boolean isCloseFilesystem() {
+        return doCloseFilesystem;
+    }
+
+    @Override
+    public SlideShow<S,P> getFilesystem() {
+        return getDocument();
+    }
 }

Modified: poi/trunk/src/java/org/apache/poi/ss/extractor/ExcelExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/java/org/apache/poi/ss/extractor/ExcelExtractor.java?rev=1880839&r1=1880838&r2=1880839&view=diff
==============================================================================
--- poi/trunk/src/java/org/apache/poi/ss/extractor/ExcelExtractor.java (original)
+++ poi/trunk/src/java/org/apache/poi/ss/extractor/ExcelExtractor.java Thu Aug 13 21:08:24 2020
@@ -24,39 +24,39 @@ public interface ExcelExtractor {
     /**
      * Should sheet names be included?
      * Default is true
-     * 
+     *
      * @param includeSheetNames {@code true} if the sheet names should be included
      */
-    public void setIncludeSheetNames(boolean includeSheetNames);
+    void setIncludeSheetNames(boolean includeSheetNames);
 
     /**
      * Should we return the formula itself, and not the result it produces?
      * Default is false
-     * 
+     *
      * @param formulasNotResults {@code true} if the formula itself is returned
      */
-    public void setFormulasNotResults(boolean formulasNotResults);
+    void setFormulasNotResults(boolean formulasNotResults);
 
     /**
      * Should headers and footers be included in the output?
      * Default is true
-     * 
+     *
      * @param includeHeadersFooters {@code true} if headers and footers should be included
      */
-    public void setIncludeHeadersFooters(boolean includeHeadersFooters);
+    void setIncludeHeadersFooters(boolean includeHeadersFooters);
 
     /**
      * Should cell comments be included?
      * Default is false
-     * 
+     *
      * @param includeCellComments {@code true} if cell comments should be included
      */
-    public void setIncludeCellComments(boolean includeCellComments);
+    void setIncludeCellComments(boolean includeCellComments);
 
     /**
      * Retrieves the text contents of the file
-     * 
+     *
      * @return the text contents of the file
      */
-    public String getText();
+    String getText();
 }

Modified: poi/trunk/src/multimodule/ooxml/java9/module-info.class
URL: http://svn.apache.org/viewvc/poi/trunk/src/multimodule/ooxml/java9/module-info.class?rev=1880839&r1=1880838&r2=1880839&view=diff
==============================================================================
Binary files - no diff available.

Modified: poi/trunk/src/multimodule/ooxml/java9/module-info.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/multimodule/ooxml/java9/module-info.java?rev=1880839&r1=1880838&r2=1880839&view=diff
==============================================================================
--- poi/trunk/src/multimodule/ooxml/java9/module-info.java (original)
+++ poi/trunk/src/multimodule/ooxml/java9/module-info.java Thu Aug 13 21:08:24 2020
@@ -29,6 +29,7 @@ module org.apache.poi.ooxml {
     requires java.security.jgss;
 
     provides org.apache.poi.ss.usermodel.WorkbookProvider with org.apache.poi.xssf.usermodel.XSSFWorkbookFactory;
+    provides org.apache.poi.extractor.ExtractorProvider with org.apache.poi.ooxml.extractor.POIXMLExtractorFactory;
 
     exports org.apache.poi.xwpf.extractor;
     exports org.apache.poi.xwpf.usermodel;

Modified: poi/trunk/src/multimodule/ooxml/test9/module-info.class
URL: http://svn.apache.org/viewvc/poi/trunk/src/multimodule/ooxml/test9/module-info.class?rev=1880839&r1=1880838&r2=1880839&view=diff
==============================================================================
Binary files - no diff available.

Modified: poi/trunk/src/multimodule/ooxml/test9/module-info.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/multimodule/ooxml/test9/module-info.java?rev=1880839&r1=1880838&r2=1880839&view=diff
==============================================================================
--- poi/trunk/src/multimodule/ooxml/test9/module-info.java (original)
+++ poi/trunk/src/multimodule/ooxml/test9/module-info.java Thu Aug 13 21:08:24 2020
@@ -29,6 +29,7 @@ module org.apache.poi.ooxml {
     requires java.security.jgss;
 
     provides org.apache.poi.ss.usermodel.WorkbookProvider with org.apache.poi.xssf.usermodel.XSSFWorkbookFactory;
+    provides org.apache.poi.extractor.ExtractorProvider with org.apache.poi.ooxml.extractor.POIXMLExtractorFactory;
 
     exports org.apache.poi.xwpf.extractor;
     exports org.apache.poi.xwpf.usermodel;

Modified: poi/trunk/src/multimodule/poi/java9/module-info.class
URL: http://svn.apache.org/viewvc/poi/trunk/src/multimodule/poi/java9/module-info.class?rev=1880839&r1=1880838&r2=1880839&view=diff
==============================================================================
Binary files - no diff available.

Modified: poi/trunk/src/multimodule/poi/java9/module-info.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/multimodule/poi/java9/module-info.java?rev=1880839&r1=1880838&r2=1880839&view=diff
==============================================================================
--- poi/trunk/src/multimodule/poi/java9/module-info.java (original)
+++ poi/trunk/src/multimodule/poi/java9/module-info.java Thu Aug 13 21:08:24 2020
@@ -28,8 +28,12 @@ module org.apache.poi.poi {
     requires jdk.unsupported;
 
     uses org.apache.poi.ss.usermodel.WorkbookProvider;
+    uses org.apache.poi.extractor.ExtractorProvider;
+
 
     provides org.apache.poi.ss.usermodel.WorkbookProvider with org.apache.poi.hssf.usermodel.HSSFWorkbookFactory;
+    provides org.apache.poi.extractor.ExtractorProvider with org.apache.poi.extractor.MainExtractorFactory;
+
 
     exports org.apache.poi;
     exports org.apache.poi.common;

Modified: poi/trunk/src/multimodule/poi/test9/module-info.class
URL: http://svn.apache.org/viewvc/poi/trunk/src/multimodule/poi/test9/module-info.class?rev=1880839&r1=1880838&r2=1880839&view=diff
==============================================================================
Binary files - no diff available.

Modified: poi/trunk/src/multimodule/poi/test9/module-info.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/multimodule/poi/test9/module-info.java?rev=1880839&r1=1880838&r2=1880839&view=diff
==============================================================================
--- poi/trunk/src/multimodule/poi/test9/module-info.java (original)
+++ poi/trunk/src/multimodule/poi/test9/module-info.java Thu Aug 13 21:08:24 2020
@@ -28,8 +28,10 @@ module org.apache.poi.poi {
     requires jdk.unsupported;
 
     uses org.apache.poi.ss.usermodel.WorkbookProvider;
+    uses org.apache.poi.extractor.ExtractorProvider;
 
     provides org.apache.poi.ss.usermodel.WorkbookProvider with org.apache.poi.hssf.usermodel.HSSFWorkbookFactory;
+    provides org.apache.poi.extractor.ExtractorProvider with org.apache.poi.extractor.MainExtractorFactory;
 
     exports org.apache.poi;
     exports org.apache.poi.common;

Modified: poi/trunk/src/multimodule/scratchpad/java9/module-info.class
URL: http://svn.apache.org/viewvc/poi/trunk/src/multimodule/scratchpad/java9/module-info.class?rev=1880839&r1=1880838&r2=1880839&view=diff
==============================================================================
Binary files - no diff available.

Modified: poi/trunk/src/multimodule/scratchpad/java9/module-info.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/multimodule/scratchpad/java9/module-info.java?rev=1880839&r1=1880838&r2=1880839&view=diff
==============================================================================
--- poi/trunk/src/multimodule/scratchpad/java9/module-info.java (original)
+++ poi/trunk/src/multimodule/scratchpad/java9/module-info.java Thu Aug 13 21:08:24 2020
@@ -20,6 +20,8 @@ module org.apache.poi.scratchpad {
     requires java.desktop;
     requires commons.math3;
 
+    provides org.apache.poi.extractor.ExtractorProvider with org.apache.poi.extractor.ole2.OLE2ScratchpadExtractorFactory;
+
     exports org.apache.poi.hmef;
     exports org.apache.poi.hmef.dev;
     exports org.apache.poi.hmef.extractor;

Modified: poi/trunk/src/multimodule/scratchpad/test9/module-info.class
URL: http://svn.apache.org/viewvc/poi/trunk/src/multimodule/scratchpad/test9/module-info.class?rev=1880839&r1=1880838&r2=1880839&view=diff
==============================================================================
Binary files - no diff available.

Modified: poi/trunk/src/multimodule/scratchpad/test9/module-info.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/multimodule/scratchpad/test9/module-info.java?rev=1880839&r1=1880838&r2=1880839&view=diff
==============================================================================
--- poi/trunk/src/multimodule/scratchpad/test9/module-info.java (original)
+++ poi/trunk/src/multimodule/scratchpad/test9/module-info.java Thu Aug 13 21:08:24 2020
@@ -20,6 +20,8 @@ module org.apache.poi.scratchpad {
     requires java.desktop;
     requires commons.math3;
 
+    provides org.apache.poi.extractor.ExtractorProvider with org.apache.poi.extractor.ole2.OLE2ScratchpadExtractorFactory;
+
     exports org.apache.poi.hmef;
     exports org.apache.poi.hmef.dev;
     exports org.apache.poi.hmef.extractor;

Modified: poi/trunk/src/ooxml/java/org/apache/poi/ooxml/extractor/CommandLineTextExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/ooxml/extractor/CommandLineTextExtractor.java?rev=1880839&r1=1880838&r2=1880839&view=diff
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/ooxml/extractor/CommandLineTextExtractor.java (original)
+++ poi/trunk/src/ooxml/java/org/apache/poi/ooxml/extractor/CommandLineTextExtractor.java Thu Aug 13 21:08:24 2020
@@ -18,15 +18,19 @@ package org.apache.poi.ooxml.extractor;
 
 import java.io.File;
 
+import org.apache.poi.extractor.ExtractorFactory;
 import org.apache.poi.extractor.POITextExtractor;
 
 /**
  * A command line wrapper around {@link ExtractorFactory}, useful
  * for when debugging.
  */
-public class CommandLineTextExtractor {
+public final class CommandLineTextExtractor {
     public static final String DIVIDER = "=======================";
 
+    private CommandLineTextExtractor() {
+    }
+
     public static void main(String[] args) throws Exception {
         if (args.length < 1) {
             System.err.println("Use:");

Copied: poi/trunk/src/ooxml/java/org/apache/poi/ooxml/extractor/POIXMLExtractorFactory.java (from r1880838, poi/trunk/src/ooxml/java/org/apache/poi/ooxml/extractor/ExtractorFactory.java)
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/ooxml/extractor/POIXMLExtractorFactory.java?p2=poi/trunk/src/ooxml/java/org/apache/poi/ooxml/extractor/POIXMLExtractorFactory.java&p1=poi/trunk/src/ooxml/java/org/apache/poi/ooxml/extractor/ExtractorFactory.java&r1=1880838&r2=1880839&rev=1880839&view=diff
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/ooxml/extractor/ExtractorFactory.java (original)
+++ poi/trunk/src/ooxml/java/org/apache/poi/ooxml/extractor/POIXMLExtractorFactory.java Thu Aug 13 21:08:24 2020
@@ -19,17 +19,12 @@ package org.apache.poi.ooxml.extractor;
 import java.io.File;
 import java.io.IOException;
 import java.io.InputStream;
-import java.lang.reflect.Method;
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
-
-import org.apache.poi.EncryptedDocumentException;
-import org.apache.poi.extractor.OLE2ExtractorFactory;
-import org.apache.poi.extractor.POIOLE2TextExtractor;
+
+import org.apache.poi.extractor.ExtractorFactory;
+import org.apache.poi.extractor.ExtractorProvider;
 import org.apache.poi.extractor.POITextExtractor;
-import org.apache.poi.hssf.extractor.ExcelExtractor;
 import org.apache.poi.hssf.record.crypto.Biff8EncryptionKey;
+import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
 import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
 import org.apache.poi.openxml4j.opc.OPCPackage;
 import org.apache.poi.openxml4j.opc.PackageAccess;
@@ -38,19 +33,11 @@ import org.apache.poi.openxml4j.opc.Pack
 import org.apache.poi.openxml4j.opc.PackageRelationshipTypes;
 import org.apache.poi.poifs.crypt.Decryptor;
 import org.apache.poi.poifs.crypt.EncryptionInfo;
-import org.apache.poi.poifs.filesystem.DirectoryEntry;
 import org.apache.poi.poifs.filesystem.DirectoryNode;
-import org.apache.poi.poifs.filesystem.Entry;
 import org.apache.poi.poifs.filesystem.FileMagic;
-import org.apache.poi.poifs.filesystem.NotOLE2FileException;
-import org.apache.poi.poifs.filesystem.OfficeXmlFileException;
 import org.apache.poi.poifs.filesystem.POIFSFileSystem;
-import org.apache.poi.sl.extractor.SlideShowExtractor;
-import org.apache.poi.util.IOUtils;
-import org.apache.poi.util.NotImplemented;
-import org.apache.poi.util.POILogFactory;
-import org.apache.poi.util.POILogger;
 import org.apache.poi.xdgf.extractor.XDGFVisioExtractor;
+import org.apache.poi.xslf.extractor.XSLFExtractor;
 import org.apache.poi.xslf.usermodel.XMLSlideShow;
 import org.apache.poi.xslf.usermodel.XSLFRelation;
 import org.apache.poi.xssf.extractor.XSSFBEventBasedExcelExtractor;
@@ -71,20 +58,20 @@ import org.apache.xmlbeans.XmlException;
  *  off switching to <a href="http://tika.apache.org">Apache Tika</a> instead!</p>
  */
 @SuppressWarnings("WeakerAccess")
-public final class ExtractorFactory {
-    private static final POILogger logger = POILogFactory.getLogger(ExtractorFactory.class);
-
-    public static final String CORE_DOCUMENT_REL = PackageRelationshipTypes.CORE_DOCUMENT;
+public final class POIXMLExtractorFactory implements ExtractorProvider {
+    private static final String CORE_DOCUMENT_REL = PackageRelationshipTypes.CORE_DOCUMENT;
     private static final String VISIO_DOCUMENT_REL = PackageRelationshipTypes.VISIO_CORE_DOCUMENT;
     private static final String STRICT_DOCUMENT_REL = PackageRelationshipTypes.STRICT_CORE_DOCUMENT;
 
     private static final XSLFRelation[] SUPPORTED_XSLF_TYPES = new XSLFRelation[]{
-            XSLFRelation.MAIN, XSLFRelation.MACRO, XSLFRelation.MACRO_TEMPLATE,
-            XSLFRelation.PRESENTATIONML, XSLFRelation.PRESENTATIONML_TEMPLATE,
-            XSLFRelation.PRESENTATION_MACRO
+        XSLFRelation.MAIN, XSLFRelation.MACRO, XSLFRelation.MACRO_TEMPLATE,
+        XSLFRelation.PRESENTATIONML, XSLFRelation.PRESENTATIONML_TEMPLATE,
+        XSLFRelation.PRESENTATION_MACRO
     };
 
-    private ExtractorFactory() {
+    @Override
+    public boolean accepts(FileMagic fm) {
+        return fm == FileMagic.OOXML;
     }
 
     /**
@@ -93,7 +80,7 @@ public final class ExtractorFactory {
      * Default is false.
      */
     public static boolean getThreadPrefersEventExtractors() {
-        return OLE2ExtractorFactory.getThreadPrefersEventExtractors();
+        return ExtractorFactory.getThreadPrefersEventExtractors();
     }
 
     /**
@@ -102,7 +89,7 @@ public final class ExtractorFactory {
      * Default is to use the thread level setting, which defaults to false.
      */
     public static Boolean getAllThreadsPreferEventExtractors() {
-        return OLE2ExtractorFactory.getAllThreadsPreferEventExtractors();
+        return ExtractorFactory.getAllThreadsPreferEventExtractors();
     }
 
     /**
@@ -110,7 +97,7 @@ public final class ExtractorFactory {
      * Will only be used if the All Threads setting is null.
      */
     public static void setThreadPrefersEventExtractors(boolean preferEventExtractors) {
-         OLE2ExtractorFactory.setThreadPrefersEventExtractors(preferEventExtractors);
+         ExtractorFactory.setThreadPrefersEventExtractors(preferEventExtractors);
     }
 
     /**
@@ -118,7 +105,7 @@ public final class ExtractorFactory {
      * If set, will take preference over the Thread level setting.
      */
     public static void setAllThreadsPreferEventExtractors(Boolean preferEventExtractors) {
-         OLE2ExtractorFactory.setAllThreadsPreferEventExtractors(preferEventExtractors);
+         ExtractorFactory.setAllThreadsPreferEventExtractors(preferEventExtractors);
     }
 
     /**
@@ -126,52 +113,54 @@ public final class ExtractorFactory {
      * Checks the all-threads one first, then thread specific.
      */
     public static boolean getPreferEventExtractor() {
-         return OLE2ExtractorFactory.getPreferEventExtractor();
+         return ExtractorFactory.getPreferEventExtractor();
     }
 
-    @SuppressWarnings("unchecked")
-    public static <T extends POITextExtractor> T createExtractor(File f) throws IOException, OpenXML4JException, XmlException {
-        POIFSFileSystem fs = null;
+    @Override
+    public POITextExtractor create(File f, String password) throws IOException {
+        if (FileMagic.valueOf(f) != FileMagic.OOXML) {
+            return ExtractorFactory.createExtractor(f, password);
+        }
+
+
+        OPCPackage pkg = null;
         try {
-            fs = new POIFSFileSystem(f);
-            if (fs.getRoot().hasEntry(Decryptor.DEFAULT_POIFS_ENTRY)) {
-                return (T)createEncryptedOOXMLExtractor(fs);
-            }
-            POITextExtractor extractor = createExtractor(fs);
-            extractor.setFilesystem(fs);
-            return (T)extractor;
-        } catch (OfficeXmlFileException e) {
-            // ensure file-handle release
-            IOUtils.closeQuietly(fs);
-            OPCPackage pkg = OPCPackage.open(f.toString(), PackageAccess.READ);
-            T t = (T)createExtractor(pkg);
-            t.setFilesystem(pkg);
-            return t;
-        } catch (NotOLE2FileException ne) {
-            // ensure file-handle release
-            IOUtils.closeQuietly(fs);
-            throw new IllegalArgumentException("Your File was neither an OLE2 file, nor an OOXML file", ne);
-        } catch (OpenXML4JException | Error | RuntimeException | IOException | XmlException e) { // NOSONAR
-            // ensure file-handle release
-            IOUtils.closeQuietly(fs);
+            pkg = OPCPackage.open(f.toString(), PackageAccess.READ);
+            POIXMLTextExtractor ex = create(pkg);
+            if (ex == null) {
+                pkg.revert();
+            }
+            return ex;
+        } catch (InvalidFormatException ife) {
+            throw new IOException(ife);
+        } catch (IOException e) {
+            pkg.revert();
             throw e;
         }
     }
 
-    public static POITextExtractor createExtractor(InputStream inp) throws IOException, OpenXML4JException, XmlException {
+    public POITextExtractor create(InputStream inp, String password) throws IOException {
         InputStream is = FileMagic.prepareToCheckMagic(inp);
 
-        FileMagic fm = FileMagic.valueOf(is);
+        if (FileMagic.valueOf(is) != FileMagic.OOXML) {
+            return ExtractorFactory.createExtractor(is, password);
+        }
 
-        switch (fm) {
-        case OLE2:
-            POIFSFileSystem fs = new POIFSFileSystem(is);
-            boolean isEncrypted = fs.getRoot().hasEntry(Decryptor.DEFAULT_POIFS_ENTRY);
-            return isEncrypted ? createEncryptedOOXMLExtractor(fs) : createExtractor(fs);
-        case OOXML:
-            return createExtractor(OPCPackage.open(is));
-        default:
-            throw new IllegalArgumentException("Your InputStream was neither an OLE2 stream, nor an OOXML stream, found type: " + fm);
+        OPCPackage pkg = null;
+        try {
+            pkg = OPCPackage.open(is);
+            POIXMLTextExtractor ex = create(pkg);
+            if (ex == null) {
+                pkg.revert();
+            }
+            return ex;
+        } catch (InvalidFormatException e) {
+            throw new IOException(e);
+        } catch (RuntimeException | IOException e) {
+            if (pkg != null) {
+                pkg.revert();
+            }
+            throw e;
         }
     }
 
@@ -181,11 +170,9 @@ public final class ExtractorFactory {
      * @param pkg An {@link OPCPackage}.
      * @return A {@link POIXMLTextExtractor} for the given file.
      * @throws IOException If an error occurs while reading the file
-     * @throws OpenXML4JException If an error parsing the OpenXML file format is found.
-     * @throws XmlException If an XML parsing error occurs.
      * @throws IllegalArgumentException If no matching file type could be found.
      */
-    public static POITextExtractor createExtractor(OPCPackage pkg) throws IOException, OpenXML4JException, XmlException {
+    public POIXMLTextExtractor create(OPCPackage pkg) throws IOException {
         try {
             // Check for the normal Office core document
             PackageRelationshipCollection core;
@@ -199,8 +186,9 @@ public final class ExtractorFactory {
             if (core.size() == 0) {
                 // Could it be a visio one?
                 core = pkg.getRelationshipsByType(VISIO_DOCUMENT_REL);
-                if (core.size() == 1)
+                if (core.size() == 1) {
                     return new XDGFVisioExtractor(pkg);
+                }
             }
 
             // Should just be a single core document, complain if not
@@ -214,7 +202,7 @@ public final class ExtractorFactory {
 
             // Is it XSSF?
             for (XSSFRelation rel : XSSFExcelExtractor.SUPPORTED_TYPES) {
-                if ( rel.getContentType().equals( contentType ) ) {
+                if (rel.getContentType().equals(contentType)) {
                     if (getPreferEventExtractor()) {
                         return new XSSFEventBasedExcelExtractor(pkg);
                     }
@@ -224,21 +212,21 @@ public final class ExtractorFactory {
 
             // Is it XWPF?
             for (XWPFRelation rel : XWPFWordExtractor.SUPPORTED_TYPES) {
-                if ( rel.getContentType().equals( contentType ) ) {
+                if (rel.getContentType().equals(contentType)) {
                     return new XWPFWordExtractor(pkg);
                 }
             }
 
             // Is it XSLF?
             for (XSLFRelation rel : SUPPORTED_XSLF_TYPES) {
-                if ( rel.getContentType().equals( contentType ) ) {
-                    return new SlideShowExtractor<>(new XMLSlideShow(pkg));
+                if (rel.getContentType().equals(contentType)) {
+                    return new XSLFExtractor(new XMLSlideShow(pkg));
                 }
             }
 
             // special handling for SlideShow-Theme-files,
             if (XSLFRelation.THEME_MANAGER.getContentType().equals(contentType)) {
-                return new SlideShowExtractor<>(new XMLSlideShow(pkg));
+                return new XSLFExtractor(new XMLSlideShow(pkg));
             }
 
             // How about xlsb?
@@ -248,137 +236,46 @@ public final class ExtractorFactory {
                 }
             }
 
-            throw new IllegalArgumentException("No supported documents found in the OOXML package (found "+contentType+")");
-
-        } catch (IOException | Error | RuntimeException | XmlException | OpenXML4JException e) { // NOSONAR
-            // ensure that we close the package again if there is an error opening it, however
-            // we need to revert the package to not re-write the file via close(), which is very likely not wanted for a TextExtractor!
-            pkg.revert();
+            return null;
+        } catch (IOException e) {
             throw e;
+        } catch (Error | RuntimeException | XmlException | OpenXML4JException e) { // NOSONAR
+            throw new IOException(e);
         }
+        // we used to close (revert()) the package here, but this is the callers responsibility
+        // and we can't reuse the package
     }
 
-    public static <T extends POITextExtractor> T createExtractor(POIFSFileSystem fs) throws IOException, OpenXML4JException, XmlException {
-        return createExtractor(fs.getRoot());
+    public POITextExtractor create(POIFSFileSystem fs) throws IOException {
+        return create(fs.getRoot(), Biff8EncryptionKey.getCurrentUserPassword());
     }
 
-    @SuppressWarnings("unchecked")
-    public static <T extends POITextExtractor> T createExtractor(DirectoryNode poifsDir) throws IOException, OpenXML4JException, XmlException
-    {
-        // First, check for OOXML
-        for (String entryName : poifsDir.getEntryNames()) {
-            if (entryName.equals("Package")) {
-                OPCPackage pkg = OPCPackage.open(poifsDir.createDocumentInputStream("Package"));
-                return (T)createExtractor(pkg);
+    @Override
+    public POITextExtractor create(DirectoryNode poifsDir, String password) throws IOException {
+        // First, check for plain OOXML package
+        if (poifsDir.hasEntry("Package")) {
+            try (InputStream is = poifsDir.createDocumentInputStream("Package")) {
+                return create(is, password);
             }
         }
 
-        // If not, ask the OLE2 code to check, with Scratchpad if possible
-        return (T)OLE2ExtractorFactory.createExtractor(poifsDir);
-    }
-
-    /**
-     * Returns an array of text extractors, one for each of
-     *  the embedded documents in the file (if there are any).
-     * If there are no embedded documents, you'll get back an
-     *  empty array. Otherwise, you'll get one open
-     *  {@link POITextExtractor} for each embedded file.
-     */
-    public static POITextExtractor[] getEmbeddedDocsTextExtractors(POIOLE2TextExtractor ext) throws IOException, OpenXML4JException, XmlException {
-        // All the embedded directories we spotted
-        ArrayList<Entry> dirs = new ArrayList<>();
-        // For anything else not directly held in as a POIFS directory
-        ArrayList<InputStream> nonPOIFS = new ArrayList<>();
-
-        // Find all the embedded directories
-        DirectoryEntry root = ext.getRoot();
-        if (root == null) {
-            throw new IllegalStateException("The extractor didn't know which POIFS it came from!");
-        }
-
-        // provide ExcelExtractor also in OOXML module, because scratchpad is not necessary for it
-        if (ext instanceof ExcelExtractor) {
-            // These are in MBD... under the root
-            Iterator<Entry> it = root.getEntries();
-            while (it.hasNext()) {
-                Entry entry = it.next();
-                if (entry.getName().startsWith("MBD")) {
-                    dirs.add(entry);
-                }
-            }
-        } else {
-            try {
-                Class<?> clazz = Class.forName("org.apache.poi.extractor.ole2.OLE2ScratchpadExtractorFactory");
-                Method m = clazz.getDeclaredMethod("identifyEmbeddedResources", POIOLE2TextExtractor.class, List.class, List.class);
-                m.invoke(null, ext, dirs, nonPOIFS);
-            } catch (ReflectiveOperationException e) {
-                logger.log(POILogger.WARN, "POI Scratchpad jar not included ", e.getLocalizedMessage());
-                return new POITextExtractor[0];
-            }
-        }
-
-        // Create the extractors
-        if (dirs.size() == 0 && nonPOIFS.size() == 0){
-            return new POITextExtractor[0];
-        }
-
-        ArrayList<POITextExtractor> textExtractors = new ArrayList<>();
-        for (Entry dir : dirs) {
-            textExtractors.add(createExtractor((DirectoryNode) dir));
-        }
-        for (InputStream nonPOIF : nonPOIFS) {
+        if (poifsDir.hasEntry(Decryptor.DEFAULT_POIFS_ENTRY)) {
+            EncryptionInfo ei = new EncryptionInfo(poifsDir);
+            Decryptor dec = ei.getDecryptor();
             try {
-                 textExtractors.add(createExtractor(nonPOIF));
-            } catch (IllegalArgumentException e) {
-                // Ignore, just means it didn't contain
-                //  a format we support as yet
-                logger.log(POILogger.INFO, "Format not supported yet", e.getLocalizedMessage());
-            } catch (XmlException | OpenXML4JException e) {
-                throw new IOException(e.getMessage(), e);
+                if (!dec.verifyPassword(password)) {
+                    throw new IOException("Invalid password specified");
+                }
+                try (InputStream is = dec.getDataStream(poifsDir)) {
+                    return create(is, password);
+                }
+            } catch (IOException e) {
+                throw e;
+            } catch (Exception e) {
+                throw new IOException(e);
             }
         }
-        return textExtractors.toArray(new POITextExtractor[0]);
-    }
 
-    /**
-     * Returns an array of text extractors, one for each of
-     *  the embedded documents in the file (if there are any).
-     * If there are no embedded documents, you'll get back an
-     *  empty array. Otherwise, you'll get one open
-     *  {@link POITextExtractor} for each embedded file.
-     */
-    @NotImplemented
-    @SuppressWarnings({"UnusedParameters", "UnusedReturnValue"})
-    public static POITextExtractor[] getEmbeddedDocsTextExtractors(POIXMLTextExtractor ext) {
-        throw new IllegalStateException("Not yet supported");
-    }
-
-    private static POITextExtractor createEncryptedOOXMLExtractor(POIFSFileSystem fs)
-    throws IOException {
-        String pass = Biff8EncryptionKey.getCurrentUserPassword();
-        if (pass == null) {
-            pass = Decryptor.DEFAULT_PASSWORD;
-        }
-
-        EncryptionInfo ei = new EncryptionInfo(fs);
-        Decryptor dec = ei.getDecryptor();
-        InputStream is = null;
-        try {
-            if (!dec.verifyPassword(pass)) {
-                throw new EncryptedDocumentException("Invalid password specified - use Biff8EncryptionKey.setCurrentUserPassword() before calling extractor");
-            }
-            is = dec.getDataStream(fs);
-            return createExtractor(OPCPackage.open(is));
-        } catch (IOException e) {
-            throw e;
-        } catch (Exception e) {
-            throw new EncryptedDocumentException(e);
-        } finally {
-            IOUtils.closeQuietly(is);
-
-            // also close the POIFSFileSystem here as we read all the data
-            // while decrypting
-            fs.close();
-        }
+        throw new IOException("The OLE2 file neither contained a plain OOXML package node (\"Package\") nor an encrypted one (\"EncryptedPackage\").");
     }
 }

Modified: poi/trunk/src/ooxml/java/org/apache/poi/ooxml/extractor/POIXMLPropertiesTextExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/ooxml/extractor/POIXMLPropertiesTextExtractor.java?rev=1880839&r1=1880838&r2=1880839&view=diff
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/ooxml/extractor/POIXMLPropertiesTextExtractor.java (original)
+++ poi/trunk/src/ooxml/java/org/apache/poi/ooxml/extractor/POIXMLPropertiesTextExtractor.java Thu Aug 13 21:08:24 2020
@@ -36,9 +36,10 @@ import org.openxmlformats.schemas.office
  * content of the OOXML file properties, eg author
  * and title.
  */
-public class POIXMLPropertiesTextExtractor extends POIXMLTextExtractor {
-
+public class POIXMLPropertiesTextExtractor implements POIXMLTextExtractor {
+    private final POIXMLDocument doc;
     private final DateFormat dateFormat;
+    private boolean doCloseFilesystem = true;
 
     /**
      * Creates a new POIXMLPropertiesTextExtractor for the given open document.
@@ -46,7 +47,7 @@ public class POIXMLPropertiesTextExtract
      * @param doc the given open document
      */
     public POIXMLPropertiesTextExtractor(POIXMLDocument doc) {
-        super(doc);
+        this.doc = doc;
         DateFormatSymbols dfs = DateFormatSymbols.getInstance(Locale.ROOT);
         dateFormat = new SimpleDateFormat("EEE MMM dd HH:mm:ss zzz yyyy", dfs);
         dateFormat.setTimeZone(LocaleUtil.TIMEZONE_UTC);
@@ -242,7 +243,7 @@ public class POIXMLPropertiesTextExtract
             }
 
          /*else if (property.isSetArray()) {
-            // TODO Fetch the array values and output 
+            // TODO Fetch the array values and output
          }
          else if (property.isSetVector()) {
             // TODO Fetch the vector values and output
@@ -281,4 +282,24 @@ public class POIXMLPropertiesTextExtract
     public POIXMLPropertiesTextExtractor getMetadataTextExtractor() {
         throw new IllegalStateException("You already have the Metadata Text Extractor, not recursing!");
     }
+
+    @Override
+    public POIXMLDocument getDocument() {
+        return doc;
+    }
+
+    @Override
+    public void setCloseFilesystem(boolean doCloseFilesystem) {
+        this.doCloseFilesystem = doCloseFilesystem;
+    }
+
+    @Override
+    public boolean isCloseFilesystem() {
+        return doCloseFilesystem;
+    }
+
+    @Override
+    public POIXMLDocument getFilesystem() {
+        return null;
+    }
 }



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@poi.apache.org
For additional commands, e-mail: commits-help@poi.apache.org