You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@poi.apache.org by ki...@apache.org on 2020/08/13 21:08:25 UTC

svn commit: r1880839 [2/3] - in /poi/trunk/src: integrationtest/org/apache/poi/stress/ java/org/apache/poi/extractor/ java/org/apache/poi/hpsf/extractor/ java/org/apache/poi/hssf/extractor/ java/org/apache/poi/sl/extractor/ java/org/apache/poi/ss/extra...

Modified: poi/trunk/src/ooxml/java/org/apache/poi/ooxml/extractor/POIXMLTextExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/ooxml/extractor/POIXMLTextExtractor.java?rev=1880839&r1=1880838&r2=1880839&view=diff
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/ooxml/extractor/POIXMLTextExtractor.java (original)
+++ poi/trunk/src/ooxml/java/org/apache/poi/ooxml/extractor/POIXMLTextExtractor.java Thu Aug 13 21:08:24 2020
@@ -27,61 +27,48 @@ import org.apache.poi.ooxml.POIXMLProper
 import org.apache.poi.openxml4j.opc.OPCPackage;
 import org.apache.poi.openxml4j.util.ZipSecureFile;
 
-public abstract class POIXMLTextExtractor extends POITextExtractor {
-	/** The POIXMLDocument that's open */
-	private final POIXMLDocument _document;
-
-	/**
-	 * Creates a new text extractor for the given document
-	 * 
-	 * @param document the document to extract from
-	 */
-	public POIXMLTextExtractor(POIXMLDocument document) {
-		_document = document;
-	}
-
+public interface POIXMLTextExtractor extends POITextExtractor {
 	/**
 	 * Returns the core document properties
-	 * 
+	 *
 	 * @return the core document properties
 	 */
-	public CoreProperties getCoreProperties() {
-		 return _document.getProperties().getCoreProperties();
+	default CoreProperties getCoreProperties() {
+		 return getDocument().getProperties().getCoreProperties();
 	}
 	/**
 	 * Returns the extended document properties
-	 * 
+	 *
 	 * @return the extended document properties
 	 */
-	public ExtendedProperties getExtendedProperties() {
-		return _document.getProperties().getExtendedProperties();
+	default ExtendedProperties getExtendedProperties() {
+		return getDocument().getProperties().getExtendedProperties();
 	}
 	/**
 	 * Returns the custom document properties
-	 * 
+	 *
 	 * @return the custom document properties
 	 */
-	public CustomProperties getCustomProperties() {
-		return _document.getProperties().getCustomProperties();
+	default CustomProperties getCustomProperties() {
+		return getDocument().getProperties().getCustomProperties();
 	}
 
 	/**
 	 * Returns opened document
-	 * 
+	 *
 	 * @return the opened document
 	 */
 	@Override
-	public final POIXMLDocument getDocument() {
-		return _document;
-	}
+	POIXMLDocument getDocument();
 
 	/**
 	 * Returns the opened OPCPackage that contains the document
-	 * 
+	 *
 	 * @return the opened OPCPackage
 	 */
-	public OPCPackage getPackage() {
-	   return _document.getPackage();
+	default OPCPackage getPackage() {
+		POIXMLDocument doc = getDocument();
+	   	return doc != null ? doc.getPackage() : null;
 	}
 
 	/**
@@ -89,25 +76,24 @@ public abstract class POIXMLTextExtracto
 	 *  document properties metadata, such as title and author.
 	 */
 	@Override
-    public POIXMLPropertiesTextExtractor getMetadataTextExtractor() {
-		return new POIXMLPropertiesTextExtractor(_document);
+    default POIXMLPropertiesTextExtractor getMetadataTextExtractor() {
+		return new POIXMLPropertiesTextExtractor(getDocument());
 	}
 
 	@Override
-	public void close() throws IOException {
+	default void close() throws IOException {
 		// e.g. XSSFEventBaseExcelExtractor passes a null-document
-		if(_document != null) {
+		if (isCloseFilesystem()) {
 			@SuppressWarnings("resource")
-            OPCPackage pkg = _document.getPackage();
-			if(pkg != null) {
+            OPCPackage pkg = getPackage();
+			if (pkg != null) {
 			    // revert the package to not re-write the file, which is very likely not wanted for a TextExtractor!
 				pkg.revert();
 			}
 		}
-		super.close();
 	}
 
-	protected void checkMaxTextSize(CharSequence text, String string) {
+	default void checkMaxTextSize(CharSequence text, String string) {
         if(string == null) {
             return;
         }

Modified: poi/trunk/src/ooxml/java/org/apache/poi/xdgf/extractor/XDGFVisioExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/xdgf/extractor/XDGFVisioExtractor.java?rev=1880839&r1=1880838&r2=1880839&view=diff
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/xdgf/extractor/XDGFVisioExtractor.java (original)
+++ poi/trunk/src/ooxml/java/org/apache/poi/xdgf/extractor/XDGFVisioExtractor.java Thu Aug 13 21:08:24 2020
@@ -18,7 +18,6 @@ package org.apache.poi.xdgf.extractor;
 
 import java.io.IOException;
 
-import org.apache.poi.ooxml.POIXMLDocument;
 import org.apache.poi.ooxml.extractor.POIXMLTextExtractor;
 import org.apache.poi.openxml4j.opc.OPCPackage;
 import org.apache.poi.xdgf.usermodel.XDGFPage;
@@ -28,12 +27,12 @@ import org.apache.poi.xdgf.usermodel.sha
 /**
  * Helper class to extract text from an OOXML Visio File
  */
-public class XDGFVisioExtractor extends POIXMLTextExtractor {
+public class XDGFVisioExtractor implements POIXMLTextExtractor {
 
     protected final XmlVisioDocument document;
-    
+    private boolean doCloseFilesystem = true;
+
     public XDGFVisioExtractor(XmlVisioDocument document) {
-        super(document);
         this.document = document;
     }
 
@@ -43,25 +42,31 @@ public class XDGFVisioExtractor extends
 
     public String getText() {
         ShapeTextVisitor visitor = new ShapeTextVisitor();
-        
+
         for (XDGFPage page: document.getPages()) {
             page.getContent().visitShapes(visitor);
         }
-        
+
         return visitor.getText();
     }
-    
-    public static void main(String [] args) throws IOException {
-        if (args.length < 1) {
-            System.err.println("Use:");
-            System.err.println("  XDGFVisioExtractor <filename.vsdx>");
-            System.exit(1);
-        }
-        POIXMLTextExtractor extractor =
-                new XDGFVisioExtractor(POIXMLDocument.openPackage(
-                        args[0]
-                ));
-        System.out.println(extractor.getText());
-        extractor.close();
+
+    @Override
+    public XmlVisioDocument getDocument() {
+        return document;
+    }
+
+    @Override
+    public void setCloseFilesystem(boolean doCloseFilesystem) {
+        this.doCloseFilesystem = doCloseFilesystem;
+    }
+
+    @Override
+    public boolean isCloseFilesystem() {
+        return doCloseFilesystem;
+    }
+
+    @Override
+    public XmlVisioDocument getFilesystem() {
+        return document;
     }
 }

Added: poi/trunk/src/ooxml/java/org/apache/poi/xslf/extractor/XSLFExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/xslf/extractor/XSLFExtractor.java?rev=1880839&view=auto
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/xslf/extractor/XSLFExtractor.java (added)
+++ poi/trunk/src/ooxml/java/org/apache/poi/xslf/extractor/XSLFExtractor.java Thu Aug 13 21:08:24 2020
@@ -0,0 +1,45 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
+package org.apache.poi.xslf.extractor;
+
+import org.apache.poi.ooxml.extractor.POIXMLPropertiesTextExtractor;
+import org.apache.poi.ooxml.extractor.POIXMLTextExtractor;
+import org.apache.poi.sl.extractor.SlideShowExtractor;
+import org.apache.poi.xslf.usermodel.XMLSlideShow;
+import org.apache.poi.xslf.usermodel.XSLFShape;
+import org.apache.poi.xslf.usermodel.XSLFTextParagraph;
+
+
+/**
+ * Helper class to extract text from an OOXML Powerpoint file
+ */
+public class XSLFExtractor extends SlideShowExtractor<XSLFShape, XSLFTextParagraph> implements POIXMLTextExtractor {
+    public XSLFExtractor(XMLSlideShow slideshow) {
+        super(slideshow);
+    }
+
+    @Override
+    public XMLSlideShow getDocument() {
+        return (XMLSlideShow)slideshow;
+    }
+
+    @Override
+    public POIXMLPropertiesTextExtractor getMetadataTextExtractor() {
+        return POIXMLTextExtractor.super.getMetadataTextExtractor();
+    }
+}

Propchange: poi/trunk/src/ooxml/java/org/apache/poi/xslf/extractor/XSLFExtractor.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: poi/trunk/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFBEventBasedExcelExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFBEventBasedExcelExtractor.java?rev=1880839&r1=1880838&r2=1880839&view=diff
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFBEventBasedExcelExtractor.java (original)
+++ poi/trunk/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFBEventBasedExcelExtractor.java Thu Aug 13 21:08:24 2020
@@ -19,7 +19,6 @@ package org.apache.poi.xssf.extractor;
 import java.io.IOException;
 import java.io.InputStream;
 
-import org.apache.poi.ooxml.extractor.POIXMLTextExtractor;
 import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
 import org.apache.poi.openxml4j.opc.OPCPackage;
 import org.apache.poi.ss.usermodel.DataFormatter;
@@ -43,8 +42,7 @@ import org.xml.sax.SAXException;
  *
  * @since 3.16-beta3
  */
-public class XSSFBEventBasedExcelExtractor extends XSSFEventBasedExcelExtractor
-        implements org.apache.poi.ss.extractor.ExcelExtractor {
+public class XSSFBEventBasedExcelExtractor extends XSSFEventBasedExcelExtractor {
 
     private static final POILogger LOGGER = POILogFactory.getLogger(XSSFBEventBasedExcelExtractor.class);
 
@@ -62,18 +60,6 @@ public class XSSFBEventBasedExcelExtract
         super(container);
     }
 
-    public static void main(String[] args) throws Exception {
-        if (args.length < 1) {
-            System.err.println("Use:");
-            System.err.println("  XSSFBEventBasedExcelExtractor <filename.xlsb>");
-            System.exit(1);
-        }
-        POIXMLTextExtractor extractor =
-                new XSSFBEventBasedExcelExtractor(args[0]);
-        System.out.println(extractor.getText());
-        extractor.close();
-    }
-
     public void setHandleHyperlinksInCells(boolean handleHyperlinksInCells) {
         this.handleHyperlinksInCells = handleHyperlinksInCells;
     }

Modified: poi/trunk/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFEventBasedExcelExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFEventBasedExcelExtractor.java?rev=1880839&r1=1880838&r2=1880839&view=diff
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFEventBasedExcelExtractor.java (original)
+++ poi/trunk/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFEventBasedExcelExtractor.java Thu Aug 13 21:08:24 2020
@@ -25,6 +25,7 @@ import java.util.Map;
 
 import javax.xml.parsers.ParserConfigurationException;
 
+import org.apache.poi.ooxml.POIXMLDocument;
 import org.apache.poi.ooxml.POIXMLProperties;
 import org.apache.poi.ooxml.POIXMLProperties.CoreProperties;
 import org.apache.poi.ooxml.POIXMLProperties.CustomProperties;
@@ -57,13 +58,13 @@ import org.xml.sax.XMLReader;
  * Implementation of a text extractor from OOXML Excel
  * files that uses SAX event based parsing.
  */
-public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
-        implements org.apache.poi.ss.extractor.ExcelExtractor {
+public class XSSFEventBasedExcelExtractor
+    implements POIXMLTextExtractor, org.apache.poi.ss.extractor.ExcelExtractor {
 
     private static final POILogger LOGGER = POILogFactory.getLogger(XSSFEventBasedExcelExtractor.class);
 
-    protected OPCPackage container;
-    protected POIXMLProperties properties;
+    protected final OPCPackage container;
+    protected final POIXMLProperties properties;
 
     protected Locale locale;
     protected boolean includeTextBoxes = true;
@@ -73,29 +74,17 @@ public class XSSFEventBasedExcelExtracto
     protected boolean formulasNotResults;
     protected boolean concatenatePhoneticRuns = true;
 
+    private boolean doCloseFilesystem = true;
+
     public XSSFEventBasedExcelExtractor(String path) throws XmlException, OpenXML4JException, IOException {
         this(OPCPackage.open(path));
     }
 
     public XSSFEventBasedExcelExtractor(OPCPackage container) throws XmlException, OpenXML4JException, IOException {
-        super(null);
         this.container = container;
-
         properties = new POIXMLProperties(container);
     }
 
-    public static void main(String[] args) throws Exception {
-        if (args.length < 1) {
-            System.err.println("Use:");
-            System.err.println("  XSSFEventBasedExcelExtractor <filename.xlsx>");
-            System.exit(1);
-        }
-        POIXMLTextExtractor extractor =
-                new XSSFEventBasedExcelExtractor(args[0]);
-        System.out.println(extractor.getText());
-        extractor.close();
-    }
-
     /**
      * Should sheet names be included? Default is true
      */
@@ -319,12 +308,23 @@ public class XSSFEventBasedExcelExtracto
     }
 
     @Override
-    public void close() throws IOException {
-        if (container != null) {
-            container.close();
-            container = null;
-        }
-        super.close();
+    public POIXMLDocument getDocument() {
+        return null;
+    }
+
+    @Override
+    public void setCloseFilesystem(boolean doCloseFilesystem) {
+        this.doCloseFilesystem = doCloseFilesystem;
+    }
+
+    @Override
+    public boolean isCloseFilesystem() {
+        return doCloseFilesystem;
+    }
+
+    @Override
+    public OPCPackage getFilesystem() {
+        return container;
     }
 
     protected class SheetTextExtractor implements SheetContentsHandler {

Modified: poi/trunk/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFExcelExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFExcelExtractor.java?rev=1880839&r1=1880838&r2=1880839&view=diff
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFExcelExtractor.java (original)
+++ poi/trunk/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFExcelExtractor.java Thu Aug 13 21:08:24 2020
@@ -20,8 +20,8 @@ import java.io.IOException;
 import java.util.Iterator;
 import java.util.Locale;
 
-import org.apache.poi.ooxml.extractor.POIXMLTextExtractor;
 import org.apache.poi.hssf.extractor.ExcelExtractor;
+import org.apache.poi.ooxml.extractor.POIXMLTextExtractor;
 import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
 import org.apache.poi.openxml4j.opc.OPCPackage;
 import org.apache.poi.ss.usermodel.Cell;
@@ -44,8 +44,8 @@ import org.apache.xmlbeans.XmlException;
 /**
  * Helper class to extract text from an OOXML Excel file
  */
-public class XSSFExcelExtractor extends POIXMLTextExtractor 
-       implements org.apache.poi.ss.extractor.ExcelExtractor {
+public class XSSFExcelExtractor
+       implements POIXMLTextExtractor, org.apache.poi.ss.extractor.ExcelExtractor {
     public static final XSSFRelation[] SUPPORTED_TYPES = new XSSFRelation[] {
         XSSFRelation.WORKBOOK, XSSFRelation.MACRO_TEMPLATE_WORKBOOK,
         XSSFRelation.MACRO_ADDIN_WORKBOOK, XSSFRelation.TEMPLATE_WORKBOOK,
@@ -53,34 +53,21 @@ public class XSSFExcelExtractor extends
     };
 
     private Locale locale;
-    private XSSFWorkbook workbook;
+    private final XSSFWorkbook workbook;
     private boolean includeSheetNames = true;
     private boolean formulasNotResults;
     private boolean includeCellComments;
     private boolean includeHeadersFooters = true;
     private boolean includeTextBoxes = true;
+    private boolean doCloseFilesystem = true;
 
     public XSSFExcelExtractor(OPCPackage container) throws XmlException, OpenXML4JException, IOException {
         this(new XSSFWorkbook(container));
     }
     public XSSFExcelExtractor(XSSFWorkbook workbook) {
-        super(workbook);
         this.workbook = workbook;
     }
 
-    public static void main(String[] args) throws Exception {
-        if(args.length < 1) {
-            System.err.println("Use:");
-            System.err.println("  XSSFExcelExtractor <filename.xlsx>");
-            System.exit(1);
-        }
-
-        try (OPCPackage pkg = OPCPackage.create(args[0]);
-             POIXMLTextExtractor extractor = new XSSFExcelExtractor(pkg)) {
-            System.out.println(extractor.getText());
-        }
-    }
-
     /**
      * Should sheet names be included? Default is true
      */
@@ -194,7 +181,7 @@ public class XSSFExcelExtractor extends
                 }
                 text.append("\n");
             }
-            
+
             // add textboxes
             if (includeTextBoxes){
                 XSSFDrawing drawing = sheet.getDrawingPatriarch();
@@ -262,4 +249,24 @@ public class XSSFExcelExtractor extends
     private String extractHeaderFooter(HeaderFooter hf) {
         return ExcelExtractor._extractHeaderFooter(hf);
     }
+
+    @Override
+    public XSSFWorkbook getDocument() {
+        return workbook;
+    }
+
+    @Override
+    public void setCloseFilesystem(boolean doCloseFilesystem) {
+        this.doCloseFilesystem = doCloseFilesystem;
+    }
+
+    @Override
+    public boolean isCloseFilesystem() {
+        return doCloseFilesystem;
+    }
+
+    @Override
+    public XSSFWorkbook getFilesystem() {
+        return workbook;
+    }
 }

Modified: poi/trunk/src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFWordExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFWordExtractor.java?rev=1880839&r1=1880838&r2=1880839&view=diff
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFWordExtractor.java (original)
+++ poi/trunk/src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFWordExtractor.java Thu Aug 13 21:08:24 2020
@@ -19,9 +19,7 @@ package org.apache.poi.xwpf.extractor;
 import java.io.IOException;
 import java.util.List;
 
-import org.apache.poi.ooxml.POIXMLDocument;
 import org.apache.poi.ooxml.extractor.POIXMLTextExtractor;
-import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
 import org.apache.poi.openxml4j.opc.OPCPackage;
 import org.apache.poi.xwpf.model.XWPFCommentsDecorator;
 import org.apache.poi.xwpf.model.XWPFHeaderFooterPolicy;
@@ -39,46 +37,31 @@ import org.apache.poi.xwpf.usermodel.XWP
 import org.apache.poi.xwpf.usermodel.XWPFTable;
 import org.apache.poi.xwpf.usermodel.XWPFTableCell;
 import org.apache.poi.xwpf.usermodel.XWPFTableRow;
-import org.apache.xmlbeans.XmlException;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSectPr;
 
 /**
  * Helper class to extract text from an OOXML Word file
  */
-public class XWPFWordExtractor extends POIXMLTextExtractor {
+public class XWPFWordExtractor implements POIXMLTextExtractor {
     public static final XWPFRelation[] SUPPORTED_TYPES = {
             XWPFRelation.DOCUMENT, XWPFRelation.TEMPLATE,
             XWPFRelation.MACRO_DOCUMENT,
             XWPFRelation.MACRO_TEMPLATE_DOCUMENT
     };
 
-    private XWPFDocument document;
+    private final XWPFDocument document;
     private boolean fetchHyperlinks;
     private boolean concatenatePhoneticRuns = true;
+    private boolean doCloseFilesystem = true;
 
-    public XWPFWordExtractor(OPCPackage container) throws XmlException, OpenXML4JException, IOException {
+    public XWPFWordExtractor(OPCPackage container) throws IOException {
         this(new XWPFDocument(container));
     }
 
     public XWPFWordExtractor(XWPFDocument document) {
-        super(document);
         this.document = document;
     }
 
-    public static void main(String[] args) throws Exception {
-        if (args.length < 1) {
-            System.err.println("Use:");
-            System.err.println("  XWPFWordExtractor <filename.docx>");
-            System.exit(1);
-        }
-        POIXMLTextExtractor extractor =
-                new XWPFWordExtractor(POIXMLDocument.openPackage(
-                        args[0]
-                ));
-        System.out.println(extractor.getText());
-        extractor.close();
-    }
-
     /**
      * Should we also fetch the hyperlinks, when fetching
      * the text content? Default is to only output the
@@ -217,4 +200,24 @@ public class XWPFWordExtractor extends P
             text.append(hfPolicy.getDefaultHeader().getText());
         }
     }
+
+    @Override
+    public XWPFDocument getDocument() {
+        return document;
+    }
+
+    @Override
+    public void setCloseFilesystem(boolean doCloseFilesystem) {
+        this.doCloseFilesystem = doCloseFilesystem;
+    }
+
+    @Override
+    public boolean isCloseFilesystem() {
+        return doCloseFilesystem;
+    }
+
+    @Override
+    public XWPFDocument getFilesystem() {
+        return document;
+    }
 }

Modified: poi/trunk/src/ooxml/testcases/org/apache/poi/extractor/ooxml/TestExtractorFactory.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/testcases/org/apache/poi/extractor/ooxml/TestExtractorFactory.java?rev=1880839&r1=1880838&r2=1880839&view=diff
==============================================================================
--- poi/trunk/src/ooxml/testcases/org/apache/poi/extractor/ooxml/TestExtractorFactory.java (original)
+++ poi/trunk/src/ooxml/testcases/org/apache/poi/extractor/ooxml/TestExtractorFactory.java Thu Aug 13 21:08:24 2020
@@ -31,23 +31,25 @@ import java.util.Locale;
 
 import org.apache.poi.POIDataSamples;
 import org.apache.poi.UnsupportedFileFormatException;
+import org.apache.poi.extractor.ExtractorFactory;
 import org.apache.poi.extractor.POIOLE2TextExtractor;
 import org.apache.poi.extractor.POITextExtractor;
 import org.apache.poi.hssf.HSSFTestDataSamples;
-import org.apache.poi.hssf.OldExcelFormatException;
 import org.apache.poi.hssf.extractor.EventBasedExcelExtractor;
 import org.apache.poi.hssf.extractor.ExcelExtractor;
-import org.apache.poi.ooxml.extractor.ExtractorFactory;
-import org.apache.poi.ooxml.extractor.POIXMLTextExtractor;
+import org.apache.poi.ooxml.extractor.POIXMLExtractorFactory;
 import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
 import org.apache.poi.openxml4j.opc.OPCPackage;
 import org.apache.poi.openxml4j.opc.PackageAccess;
 import org.apache.poi.poifs.filesystem.FileMagic;
+import org.apache.poi.poifs.filesystem.NotOLE2FileException;
 import org.apache.poi.poifs.filesystem.POIFSFileSystem;
 import org.apache.poi.xssf.extractor.XSSFEventBasedExcelExtractor;
 import org.apache.poi.xssf.extractor.XSSFExcelExtractor;
 import org.apache.xmlbeans.XmlException;
+import org.junit.Rule;
 import org.junit.Test;
+import org.junit.rules.ExpectedException;
 
 /**
  * Test that the extractor factory plays nicely
@@ -89,6 +91,8 @@ public class TestExtractorFactory {
     private static POIDataSamples pubTests = POIDataSamples.getPublisherInstance();
     private static File pub = getFileAndCheck(pubTests, "Simple.pub");
 
+    private static final POIXMLExtractorFactory xmlFactory = new POIXMLExtractorFactory();
+
     private static File getFileAndCheck(POIDataSamples samples, String name) {
         File file = samples.getFile(name);
 
@@ -110,7 +114,7 @@ public class TestExtractorFactory {
         "Word 6", doc6, "Word6Extractor", 20,
         "Word 95", doc95, "Word6Extractor", 120,
         "PowerPoint", ppt, "SlideShowExtractor", 120,
-        "PowerPoint - pptx", pptx, "SlideShowExtractor", 120,
+        "PowerPoint - pptx", pptx, "XSLFExtractor", 120,
         "Visio", vsd, "VisioTextExtractor", 50,
         "Visio - vsdx", vsdx, "XDGFVisioExtractor", 20,
         "Publisher", pub, "PublisherTextExtractor", 50,
@@ -125,6 +129,8 @@ public class TestExtractorFactory {
         R apply(T t) throws IOException, OpenXML4JException, XmlException;
     }
 
+    @Rule
+    public ExpectedException thrown = ExpectedException.none();
 
     @Test
     public void testFile() throws Exception {
@@ -135,12 +141,12 @@ public class TestExtractorFactory {
         }
     }
 
-    @Test(expected = IllegalArgumentException.class)
+    @Test
     public void testFileInvalid() throws Exception {
+        thrown.expectMessage("Can't create extractor - unsupported file type: UNKNOWN");
+        thrown.expect(IOException.class);
         // Text
-        try (POITextExtractor ignored = ExtractorFactory.createExtractor(txt)) {
-            fail("extracting from invalid package");
-        }
+        ExtractorFactory.createExtractor(txt);
     }
 
     @Test
@@ -148,8 +154,10 @@ public class TestExtractorFactory {
         testStream(ExtractorFactory::createExtractor, true);
     }
 
-    @Test(expected = IllegalArgumentException.class)
+    @Test
     public void testInputStreamInvalid() throws Exception {
+        thrown.expectMessage("Can't create extractor - unsupported file type: UNKNOWN");
+        thrown.expect(IOException.class);
         testInvalid(ExtractorFactory::createExtractor);
     }
 
@@ -158,8 +166,10 @@ public class TestExtractorFactory {
         testStream((f) -> ExtractorFactory.createExtractor(new POIFSFileSystem(f)), false);
     }
 
-    @Test(expected = IOException.class)
+    @Test
     public void testPOIFSInvalid() throws Exception {
+        thrown.expectMessage("Invalid header signature; read 0x3D20726F68747541, expected 0xE11AB1A1E011CFD0");
+        thrown.expect(NotOLE2FileException.class);
         testInvalid((f) -> ExtractorFactory.createExtractor(new POIFSFileSystem(f)));
     }
 
@@ -195,9 +205,7 @@ public class TestExtractorFactory {
              POITextExtractor ignored = poifs.apply(fis)) {
             fail("extracting from invalid package");
         } catch (IllegalArgumentException e) {
-            assertTrue("Had: " + e,
-                    e.getMessage().contains(FileMagic.UNKNOWN.name()));
-
+            assertTrue("Had: " + e, e.getMessage().contains(FileMagic.UNKNOWN.name()));
             throw e;
         }
     }
@@ -211,7 +219,7 @@ public class TestExtractorFactory {
             }
 
             try (final OPCPackage pkg = OPCPackage.open(testFile, PackageAccess.READ);
-                 final POITextExtractor ext = ExtractorFactory.createExtractor(pkg)) {
+                 final POITextExtractor ext = xmlFactory.create(pkg)) {
                 testExtractor(ext, (String) TEST_SET[i], (String) TEST_SET[i + 2], (Integer) TEST_SET[i + 3]);
                 pkg.revert();
             }
@@ -222,7 +230,7 @@ public class TestExtractorFactory {
     public void testPackageInvalid() throws Exception {
         // Text
         try (final OPCPackage pkg = OPCPackage.open(txt, PackageAccess.READ);
-             final POITextExtractor ignored = ExtractorFactory.createExtractor(pkg)) {
+             final POITextExtractor ignored = xmlFactory.create(pkg)) {
             fail("extracting from invalid package");
         }
     }
@@ -251,61 +259,45 @@ public class TestExtractorFactory {
         assertTrue(ExtractorFactory.getThreadPrefersEventExtractors());
         assertNull(ExtractorFactory.getAllThreadsPreferEventExtractors());
 
+        try {
+            // Check we get the right extractors now
+            try (POITextExtractor extractor = ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(xls)))) {
+                assertTrue(extractor instanceof EventBasedExcelExtractor);
+            }
+            try (POITextExtractor extractor = ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(xls)))) {
+                assertTrue(extractor.getText().length() > 200);
+            }
 
-        // Check we get the right extractors now
-        POITextExtractor extractor = ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(xls)));
-        assertTrue(
-                extractor
-                instanceof EventBasedExcelExtractor
-        );
-        extractor.close();
-        extractor = ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(xls)));
-        assertTrue(
-                extractor.getText().length() > 200
-        );
-        extractor.close();
-
-        extractor = ExtractorFactory.createExtractor(OPCPackage.open(xlsx.toString(), PackageAccess.READ));
-        assertTrue(extractor instanceof XSSFEventBasedExcelExtractor);
-        extractor.close();
-
-        extractor = ExtractorFactory.createExtractor(OPCPackage.open(xlsx.toString(), PackageAccess.READ));
-        assertTrue(
-                extractor.getText().length() > 200
-        );
-        extractor.close();
+            try (POITextExtractor extractor = xmlFactory.create(OPCPackage.open(xlsx.toString(), PackageAccess.READ))) {
+                assertTrue(extractor instanceof XSSFEventBasedExcelExtractor);
+            }
 
+            try (POITextExtractor extractor = xmlFactory.create(OPCPackage.open(xlsx.toString(), PackageAccess.READ))) {
+                assertTrue(extractor.getText().length() > 200);
+            }
+        } finally {
+            // Put back to normal
+            ExtractorFactory.setThreadPrefersEventExtractors(false);
+        }
 
-        // Put back to normal
-        ExtractorFactory.setThreadPrefersEventExtractors(false);
         assertFalse(ExtractorFactory.getPreferEventExtractor());
         assertFalse(ExtractorFactory.getThreadPrefersEventExtractors());
         assertNull(ExtractorFactory.getAllThreadsPreferEventExtractors());
 
         // And back
-        extractor = ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(xls)));
-        assertTrue(
-                extractor
-                instanceof ExcelExtractor
-        );
-        extractor.close();
-        extractor = ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(xls)));
-        assertTrue(
-                extractor.getText().length() > 200
-        );
-        extractor.close();
-
-        extractor = ExtractorFactory.createExtractor(OPCPackage.open(xlsx.toString(), PackageAccess.READ));
-        assertTrue(
-                extractor
-                instanceof XSSFExcelExtractor
-        );
-        extractor.close();
-        extractor = ExtractorFactory.createExtractor(OPCPackage.open(xlsx.toString()));
-        assertTrue(
-                extractor.getText().length() > 200
-        );
-        extractor.close();
+        try (POITextExtractor extractor = ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(xls)))) {
+            assertTrue(extractor instanceof ExcelExtractor);
+        }
+        try (POITextExtractor extractor = ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(xls)))) {
+            assertTrue(extractor.getText().length() > 200);
+        }
+
+        try (POITextExtractor extractor = xmlFactory.create(OPCPackage.open(xlsx.toString(), PackageAccess.READ))) {
+            assertTrue(extractor instanceof XSSFExcelExtractor);
+        }
+        try (POITextExtractor extractor = xmlFactory.create(OPCPackage.open(xlsx.toString()))) {
+            assertTrue(extractor.getText().length() > 200);
+        }
     }
 
     /**
@@ -325,7 +317,7 @@ public class TestExtractorFactory {
         };
 
         for (int i=0; i<testObj.length; i+=3) {
-            try (final POIOLE2TextExtractor ext = ExtractorFactory.createExtractor((File)testObj[i+1])) {
+            try (final POIOLE2TextExtractor ext = (POIOLE2TextExtractor)ExtractorFactory.createExtractor((File)testObj[i+1])) {
                 final POITextExtractor[] embeds = ExtractorFactory.getEmbeddedDocsTextExtractors(ext);
 
                 int numWord = 0, numXls = 0, numPpt = 0, numMsg = 0, numWordX = 0;
@@ -443,13 +435,13 @@ public class TestExtractorFactory {
         "spreadsheet/WithChartSheet.xlsx",
         "spreadsheet/chart_sheet.xlsx",
     };
-    
+
     @Test
     public void testFileLeak() {
-        // run a number of files that might fail in order to catch 
+        // run a number of files that might fail in order to catch
         // leaked file resources when using file-leak-detector while
         // running the test
-        
+
         for(String file : EXPECTED_FAILURES) {
             try {
                 ExtractorFactory.createExtractor(POIDataSamples.getSpreadSheetInstance().getFile(file));
@@ -458,21 +450,22 @@ public class TestExtractorFactory {
             }
         }
     }
-    
+
     /**
-     *  #59074 - Excel 95 files should give a helpful message, not just 
+     *  #59074 - Excel 95 files should give a helpful message, not just
      *   "No supported documents found in the OLE2 stream"
      */
-    @Test(expected = OldExcelFormatException.class)
     public void bug59074() throws Exception {
-        ExtractorFactory.createExtractor(
-                POIDataSamples.getSpreadSheetInstance().getFile("59074.xls"));
+        try (POITextExtractor extractor = ExtractorFactory.createExtractor(POIDataSamples.getSpreadSheetInstance().getFile("59074.xls"))) {
+            String text = extractor.getText();
+            assertContains(text, "testdoc");
+        }
     }
 
     @Test(expected = IllegalStateException.class)
-    public void testGetEmbeddedFromXMLExtractor() {
+    public void testGetEmbeddedFromXMLExtractor() throws IOException {
         // currently not implemented
-        ExtractorFactory.getEmbeddedDocsTextExtractors((POIXMLTextExtractor)null);
+        ExtractorFactory.getEmbeddedDocsTextExtractors(null);
     }
 
     // This bug is currently open. This test will fail with "expected error not thrown" when the bug has been fixed.

Modified: poi/trunk/src/ooxml/testcases/org/apache/poi/openxml4j/opc/TestPackage.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/testcases/org/apache/poi/openxml4j/opc/TestPackage.java?rev=1880839&r1=1880838&r2=1880839&view=diff
==============================================================================
--- poi/trunk/src/ooxml/testcases/org/apache/poi/openxml4j/opc/TestPackage.java (original)
+++ poi/trunk/src/ooxml/testcases/org/apache/poi/openxml4j/opc/TestPackage.java Thu Aug 13 21:08:24 2020
@@ -60,9 +60,9 @@ import org.apache.poi.EncryptedDocumentE
 import org.apache.poi.POIDataSamples;
 import org.apache.poi.POITestCase;
 import org.apache.poi.UnsupportedFileFormatException;
+import org.apache.poi.extractor.ExtractorFactory;
 import org.apache.poi.extractor.POITextExtractor;
 import org.apache.poi.ooxml.POIXMLException;
-import org.apache.poi.ooxml.extractor.ExtractorFactory;
 import org.apache.poi.ooxml.util.DocumentHelper;
 import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
 import org.apache.poi.openxml4j.exceptions.InvalidOperationException;
@@ -836,7 +836,7 @@ public final class TestPackage {
 
 	@Test
 	public void testZipEntityExpansionExceedsMemory() throws IOException, OpenXML4JException, XmlException {
-		expectedEx.expect(POIXMLException.class);
+		expectedEx.expect(IOException.class);
 		expectedEx.expectMessage("unable to parse shared strings table");
 		expectedEx.expectCause(getCauseMatcher(SAXParseException.class, "The parser has encountered more than"));
 		openXmlBombFile("poc-xmlbomb.xlsx");
@@ -844,7 +844,7 @@ public final class TestPackage {
 
 	@Test
 	public void testZipEntityExpansionExceedsMemory2() throws IOException, OpenXML4JException, XmlException {
-		expectedEx.expect(POIXMLException.class);
+		expectedEx.expect(IOException.class);
 		expectedEx.expectMessage("unable to parse shared strings table");
 		expectedEx.expectCause(getCauseMatcher(SAXParseException.class, "The parser has encountered more than"));
     	openXmlBombFile("poc-xmlbomb-empty.xlsx");

Modified: poi/trunk/src/ooxml/testcases/org/apache/poi/poifs/crypt/tests/TestHxxFEncryption.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/testcases/org/apache/poi/poifs/crypt/tests/TestHxxFEncryption.java?rev=1880839&r1=1880838&r2=1880839&view=diff
==============================================================================
--- poi/trunk/src/ooxml/testcases/org/apache/poi/poifs/crypt/tests/TestHxxFEncryption.java (original)
+++ poi/trunk/src/ooxml/testcases/org/apache/poi/poifs/crypt/tests/TestHxxFEncryption.java Thu Aug 13 21:08:24 2020
@@ -35,14 +35,12 @@ import java.util.Collection;
 
 import org.apache.poi.POIDataSamples;
 import org.apache.poi.POIDocument;
+import org.apache.poi.extractor.ExtractorFactory;
 import org.apache.poi.extractor.POITextExtractor;
 import org.apache.poi.hssf.record.crypto.Biff8EncryptionKey;
-import org.apache.poi.ooxml.extractor.ExtractorFactory;
-import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
 import org.apache.poi.poifs.crypt.EncryptionInfo;
 import org.apache.poi.poifs.crypt.cryptoapi.CryptoAPIEncryptionHeader;
 import org.apache.poi.poifs.storage.RawDataUtil;
-import org.apache.xmlbeans.XmlException;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.Parameterized;
@@ -91,7 +89,7 @@ public class TestHxxFEncryption {
     }
 
     @Test
-    public void extract() throws IOException, OpenXML4JException, XmlException {
+    public void extract() throws IOException {
         File f = sampleDir.getFile(file);
         Biff8EncryptionKey.setCurrentUserPassword(password);
         try (POITextExtractor te = ExtractorFactory.createExtractor(f)) {
@@ -103,16 +101,16 @@ public class TestHxxFEncryption {
     }
 
     @Test
-    public void changePassword() throws IOException, OpenXML4JException, XmlException {
+    public void changePassword() throws IOException {
         newPassword("test");
     }
 
     @Test
-    public void removePassword() throws IOException, OpenXML4JException, XmlException {
+    public void removePassword() throws IOException {
         newPassword(null);
     }
 
-    private void newPassword(String newPass) throws IOException, OpenXML4JException, XmlException {
+    private void newPassword(String newPass) throws IOException {
         File f = sampleDir.getFile(file);
         Biff8EncryptionKey.setCurrentUserPassword(password);
         try (POITextExtractor te1 = ExtractorFactory.createExtractor(f)) {
@@ -133,7 +131,7 @@ public class TestHxxFEncryption {
 
     /** changing the encryption mode and key size in poor mans style - see comments below */
     @Test
-    public void changeEncryption() throws IOException, OpenXML4JException, XmlException {
+    public void changeEncryption() throws IOException {
         File f = sampleDir.getFile(file);
         ByteArrayOutputStream bos = new ByteArrayOutputStream();
         Biff8EncryptionKey.setCurrentUserPassword(password);
@@ -157,7 +155,7 @@ public class TestHxxFEncryption {
                  POIDocument doc = (POIDocument) te3.getDocument()) {
                 // need to cache data (i.e. read all data) before changing the key size
                 Class<?> clazz = doc.getClass();
-                if ("HSLFSlideShowImpl".equals(clazz.getSimpleName())) {
+                if ("HSLFSlideShow".equals(clazz.getSimpleName())) {
                     try {
                         clazz.getDeclaredMethod("getPictureData").invoke(doc);
                     } catch (ReflectiveOperationException e) {

Modified: poi/trunk/src/ooxml/testcases/org/apache/poi/xslf/TestXSLFBugs.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/testcases/org/apache/poi/xslf/TestXSLFBugs.java?rev=1880839&r1=1880838&r2=1880839&view=diff
==============================================================================
--- poi/trunk/src/ooxml/testcases/org/apache/poi/xslf/TestXSLFBugs.java (original)
+++ poi/trunk/src/ooxml/testcases/org/apache/poi/xslf/TestXSLFBugs.java Thu Aug 13 21:08:24 2020
@@ -522,7 +522,7 @@ public class TestXSLFBugs {
     private String getSlideText(XMLSlideShow ppt, XSLFSlide slide) throws IOException {
         try (SlideShowExtractor<XSLFShape,XSLFTextParagraph> extr = new SlideShowExtractor<>(ppt)) {
             // do not auto-close the slideshow
-            extr.setFilesystem(null);
+            extr.setCloseFilesystem(false);
             extr.setSlidesByDefault(true);
             extr.setNotesByDefault(false);
             extr.setMasterByDefault(false);

Modified: poi/trunk/src/ooxml/testcases/org/apache/poi/xslf/extractor/TestXSLFPowerPointExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/testcases/org/apache/poi/xslf/extractor/TestXSLFPowerPointExtractor.java?rev=1880839&r1=1880838&r2=1880839&view=diff
==============================================================================
--- poi/trunk/src/ooxml/testcases/org/apache/poi/xslf/extractor/TestXSLFPowerPointExtractor.java (original)
+++ poi/trunk/src/ooxml/testcases/org/apache/poi/xslf/extractor/TestXSLFPowerPointExtractor.java Thu Aug 13 21:08:24 2020
@@ -29,20 +29,18 @@ import java.io.IOException;
 import java.io.InputStream;
 
 import org.apache.poi.POIDataSamples;
-import org.apache.poi.ooxml.extractor.ExtractorFactory;
-import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
+import org.apache.poi.extractor.ExtractorFactory;
 import org.apache.poi.sl.extractor.SlideShowExtractor;
 import org.apache.poi.xslf.usermodel.XMLSlideShow;
 import org.apache.poi.xslf.usermodel.XSLFShape;
 import org.apache.poi.xslf.usermodel.XSLFTextParagraph;
-import org.apache.xmlbeans.XmlException;
 import org.junit.Test;
 
 /**
  * Tests for XSLFPowerPointExtractor
  */
 public class TestXSLFPowerPointExtractor {
-    private static POIDataSamples slTests = POIDataSamples.getSlideShowInstance();
+    private static final POIDataSamples slTests = POIDataSamples.getSlideShowInstance();
 
 	/**
 	 * Get text out of the simple file
@@ -262,10 +260,11 @@ public class TestXSLFPowerPointExtractor
     }
 
     @Test
-    public void test45541() throws IOException, OpenXML4JException, XmlException {
+    public void test45541() throws IOException {
         // extract text from a powerpoint that has a header in the notes-element
         final File headerFile = slTests.getFile("45541_Header.pptx");
-        try (final SlideShowExtractor extr = ExtractorFactory.createExtractor(headerFile)) {
+        //noinspection rawtypes
+        try (final SlideShowExtractor extr = (SlideShowExtractor) ExtractorFactory.createExtractor(headerFile)) {
             String text = extr.getText();
             assertNotNull(text);
             assertFalse("Had: " + text, text.contains("testdoc"));
@@ -280,7 +279,8 @@ public class TestXSLFPowerPointExtractor
 
         // extract text from a powerpoint that has a footer in the master-slide
         final File footerFile = slTests.getFile("45541_Footer.pptx");
-        try (SlideShowExtractor extr = ExtractorFactory.createExtractor(footerFile)) {
+        //noinspection rawtypes
+        try (SlideShowExtractor extr = (SlideShowExtractor)ExtractorFactory.createExtractor(footerFile)) {
             String text = extr.getText();
             assertNotContained(text, "testdoc");
 

Modified: poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFEventBasedExcelExtractorUsingFactory.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFEventBasedExcelExtractorUsingFactory.java?rev=1880839&r1=1880838&r2=1880839&view=diff
==============================================================================
--- poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFEventBasedExcelExtractorUsingFactory.java (original)
+++ poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFEventBasedExcelExtractorUsingFactory.java Thu Aug 13 21:08:24 2020
@@ -16,7 +16,7 @@
 ==================================================================== */
 package org.apache.poi.xssf.extractor;
 
-import org.apache.poi.ooxml.extractor.ExtractorFactory;
+import org.apache.poi.extractor.ExtractorFactory;
 import org.apache.poi.hssf.HSSFTestDataSamples;
 import org.junit.After;
 
@@ -27,7 +27,7 @@ public class TestXSSFEventBasedExcelExtr
 		ExtractorFactory.setAllThreadsPreferEventExtractors(true);
 		return (XSSFEventBasedExcelExtractor) ExtractorFactory.createExtractor(HSSFTestDataSamples.openSampleFileStream(sampleName));
 	}
-	
+
 	@After
     public void tearDown() {
 	    // reset setting to not affect other tests

Modified: poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFExcelExtractorUsingFactory.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFExcelExtractorUsingFactory.java?rev=1880839&r1=1880838&r2=1880839&view=diff
==============================================================================
--- poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFExcelExtractorUsingFactory.java (original)
+++ poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFExcelExtractorUsingFactory.java Thu Aug 13 21:08:24 2020
@@ -17,8 +17,8 @@
 
 package org.apache.poi.xssf.extractor;
 
+import org.apache.poi.extractor.ExtractorFactory;
 import org.apache.poi.hssf.HSSFTestDataSamples;
-import org.apache.poi.ooxml.extractor.ExtractorFactory;
 import org.junit.After;
 
 /**

Added: poi/trunk/src/resources/main/META-INF/services/org.apache.poi.extractor.ExtractorProvider
URL: http://svn.apache.org/viewvc/poi/trunk/src/resources/main/META-INF/services/org.apache.poi.extractor.ExtractorProvider?rev=1880839&view=auto
==============================================================================
--- poi/trunk/src/resources/main/META-INF/services/org.apache.poi.extractor.ExtractorProvider (added)
+++ poi/trunk/src/resources/main/META-INF/services/org.apache.poi.extractor.ExtractorProvider Thu Aug 13 21:08:24 2020
@@ -0,0 +1,18 @@
+# ====================================================================
+#  Licensed to the Apache Software Foundation (ASF) under one or more
+#  contributor license agreements.  See the NOTICE file distributed with
+#  this work for additional information regarding copyright ownership.
+#  The ASF licenses this file to You under the Apache License, Version 2.0
+#  (the "License"); you may not use this file except in compliance with
+#  the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+# ====================================================================
+
+org.apache.poi.extractor.MainExtractorFactory
\ No newline at end of file

Copied: poi/trunk/src/resources/ooxml/META-INF/services/org.apache.poi.extractor.ExtractorProvider (from r1880689, poi/trunk/src/resources/main/META-INF/services/org.apache.poi.ss.usermodel.WorkbookProvider)
URL: http://svn.apache.org/viewvc/poi/trunk/src/resources/ooxml/META-INF/services/org.apache.poi.extractor.ExtractorProvider?p2=poi/trunk/src/resources/ooxml/META-INF/services/org.apache.poi.extractor.ExtractorProvider&p1=poi/trunk/src/resources/main/META-INF/services/org.apache.poi.ss.usermodel.WorkbookProvider&r1=1880689&r2=1880839&rev=1880839&view=diff
==============================================================================
--- poi/trunk/src/resources/main/META-INF/services/org.apache.poi.ss.usermodel.WorkbookProvider (original)
+++ poi/trunk/src/resources/ooxml/META-INF/services/org.apache.poi.extractor.ExtractorProvider Thu Aug 13 21:08:24 2020
@@ -15,4 +15,4 @@
 #  limitations under the License.
 # ====================================================================
 
-org.apache.poi.hssf.usermodel.HSSFWorkbookFactory
\ No newline at end of file
+org.apache.poi.ooxml.extractor.POIXMLExtractorFactory
\ No newline at end of file

Added: poi/trunk/src/resources/scratchpad/META-INF/services/org.apache.poi.extractor.ExtractorProvider
URL: http://svn.apache.org/viewvc/poi/trunk/src/resources/scratchpad/META-INF/services/org.apache.poi.extractor.ExtractorProvider?rev=1880839&view=auto
==============================================================================
--- poi/trunk/src/resources/scratchpad/META-INF/services/org.apache.poi.extractor.ExtractorProvider (added)
+++ poi/trunk/src/resources/scratchpad/META-INF/services/org.apache.poi.extractor.ExtractorProvider Thu Aug 13 21:08:24 2020
@@ -0,0 +1,18 @@
+# ====================================================================
+#  Licensed to the Apache Software Foundation (ASF) under one or more
+#  contributor license agreements.  See the NOTICE file distributed with
+#  this work for additional information regarding copyright ownership.
+#  The ASF licenses this file to You under the Apache License, Version 2.0
+#  (the "License"); you may not use this file except in compliance with
+#  the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+# ====================================================================
+
+org.apache.poi.extractor.ole2.OLE2ScratchpadExtractorFactory
\ No newline at end of file

Modified: poi/trunk/src/scratchpad/src/org/apache/poi/extractor/ole2/OLE2ScratchpadExtractorFactory.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/extractor/ole2/OLE2ScratchpadExtractorFactory.java?rev=1880839&r1=1880838&r2=1880839&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/extractor/ole2/OLE2ScratchpadExtractorFactory.java (original)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/extractor/ole2/OLE2ScratchpadExtractorFactory.java Thu Aug 13 21:08:24 2020
@@ -17,44 +17,66 @@
 package org.apache.poi.extractor.ole2;
 
 import java.io.ByteArrayInputStream;
+import java.io.File;
 import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.io.InputStream;
-import java.util.Iterator;
 import java.util.List;
+import java.util.stream.StreamSupport;
 
+import org.apache.poi.extractor.ExtractorFactory;
+import org.apache.poi.extractor.ExtractorProvider;
 import org.apache.poi.extractor.POIOLE2TextExtractor;
 import org.apache.poi.extractor.POITextExtractor;
-import org.apache.poi.extractor.OLE2ExtractorFactory;
 import org.apache.poi.hdgf.extractor.VisioTextExtractor;
 import org.apache.poi.hpbf.extractor.PublisherTextExtractor;
+import org.apache.poi.hslf.usermodel.HSLFShape;
 import org.apache.poi.hslf.usermodel.HSLFSlideShow;
+import org.apache.poi.hslf.usermodel.HSLFTextParagraph;
 import org.apache.poi.hsmf.MAPIMessage;
 import org.apache.poi.hsmf.datatypes.AttachmentChunks;
 import org.apache.poi.hsmf.extractor.OutlookTextExtractor;
 import org.apache.poi.hssf.extractor.ExcelExtractor;
+import org.apache.poi.hssf.record.crypto.Biff8EncryptionKey;
 import org.apache.poi.hwpf.OldWordFileFormatException;
 import org.apache.poi.hwpf.extractor.Word6Extractor;
 import org.apache.poi.hwpf.extractor.WordExtractor;
 import org.apache.poi.poifs.filesystem.DirectoryEntry;
 import org.apache.poi.poifs.filesystem.DirectoryNode;
 import org.apache.poi.poifs.filesystem.Entry;
+import org.apache.poi.poifs.filesystem.FileMagic;
+import org.apache.poi.poifs.filesystem.POIFSFileSystem;
 import org.apache.poi.sl.extractor.SlideShowExtractor;
 import org.apache.poi.sl.usermodel.SlideShowFactory;
 import org.apache.poi.util.POILogFactory;
 import org.apache.poi.util.POILogger;
 
 /**
- * Scratchpad-specific logic for {@link OLE2ExtractorFactory} and
+ * Scratchpad-specific logic for {@link ExtractorFactory} and
  *  {@link org.apache.poi.extractor.ExtractorFactory}, which permit the other two to run with
  *  no Scratchpad jar (though without functionality!)
  * <p>Note - should not be used standalone, always use via the other
  *  two classes</p>
  */
 @SuppressWarnings("WeakerAccess")
-public class OLE2ScratchpadExtractorFactory {
+public class OLE2ScratchpadExtractorFactory implements ExtractorProvider {
     private static final POILogger logger = POILogFactory.getLogger(OLE2ScratchpadExtractorFactory.class);
 
+    @Override
+    public boolean accepts(FileMagic fm) {
+        return FileMagic.OLE2 == fm;
+    }
+
+    @Override
+    public POITextExtractor create(File file, String password) throws IOException {
+        return create(new POIFSFileSystem(file, true).getRoot(), password);
+    }
+
+    @Override
+    public POITextExtractor create(InputStream inputStream, String password) throws IOException {
+        return create(new POIFSFileSystem(inputStream).getRoot(), password);
+    }
+
     /**
      * Look for certain entries in the stream, to figure it
      * out what format is desired
@@ -66,48 +88,54 @@ public class OLE2ScratchpadExtractorFact
      *
      * @throws IOException when the format specific extraction fails because of invalid entires
      */
-    public static POITextExtractor createExtractor(DirectoryNode poifsDir) throws IOException {
-        if (poifsDir.hasEntry("WordDocument")) {
-            // Old or new style word document?
-            try {
-                return new WordExtractor(poifsDir);
-            } catch (OldWordFileFormatException e) {
-                return new Word6Extractor(poifsDir);
+    public POITextExtractor create(DirectoryNode poifsDir, String password) throws IOException {
+        final String oldPW = Biff8EncryptionKey.getCurrentUserPassword();
+        try {
+            Biff8EncryptionKey.setCurrentUserPassword(password);
+            if (poifsDir.hasEntry("WordDocument")) {
+                // Old or new style word document?
+                try {
+                    return new WordExtractor(poifsDir);
+                } catch (OldWordFileFormatException e) {
+                    return new Word6Extractor(poifsDir);
+                }
             }
-        }
 
-        if (poifsDir.hasEntry(HSLFSlideShow.POWERPOINT_DOCUMENT)) {
-            return new SlideShowExtractor(SlideShowFactory.create(poifsDir));
-        }
+            if (poifsDir.hasEntry(HSLFSlideShow.POWERPOINT_DOCUMENT)) {
+                return new SlideShowExtractor<HSLFShape, HSLFTextParagraph>(SlideShowFactory.create(poifsDir));
+            }
 
-        if (poifsDir.hasEntry("VisioDocument")) {
-            return new VisioTextExtractor(poifsDir);
-        }
+            if (poifsDir.hasEntry("VisioDocument")) {
+                return new VisioTextExtractor(poifsDir);
+            }
 
-        if (poifsDir.hasEntry("Quill")) {
-            return new PublisherTextExtractor(poifsDir);
-        }
+            if (poifsDir.hasEntry("Quill")) {
+                return new PublisherTextExtractor(poifsDir);
+            }
 
-        final String[] outlookEntryNames = new String[] {
-                // message bodies, saved as plain text (PtypString)
-                // The first short (0x1000, 0x0047, 0x0037) refer to the Property ID (see [MS-OXPROPS].pdf)
-                // the second short (0x001e, 0x001f, 0x0102) refer to the type of data stored in this entry
-                // https://msdn.microsoft.com/endatatypes.Ex-us/library/cc433490(v=exchg.80).aspx
-                // @see org.apache.poi.hsmf.Types.MAPIType
-                "__substg1.0_1000001E", //PidTagBody ASCII
-                "__substg1.0_1000001F", //PidTagBody Unicode
-                "__substg1.0_0047001E", //PidTagMessageSubmissionId ASCII
-                "__substg1.0_0047001F", //PidTagMessageSubmissionId Unicode
-                "__substg1.0_0037001E", //PidTagSubject ASCII
-                "__substg1.0_0037001F", //PidTagSubject Unicode
-        };
-        for (String entryName : outlookEntryNames) {
-            if (poifsDir.hasEntry(entryName)) {
-                return new OutlookTextExtractor(poifsDir);
+            final String[] outlookEntryNames = new String[]{
+                    // message bodies, saved as plain text (PtypString)
+                    // The first short (0x1000, 0x0047, 0x0037) refer to the Property ID (see [MS-OXPROPS].pdf)
+                    // the second short (0x001e, 0x001f, 0x0102) refer to the type of data stored in this entry
+                    // https://msdn.microsoft.com/endatatypes.Ex-us/library/cc433490(v=exchg.80).aspx
+                    // @see org.apache.poi.hsmf.Types.MAPIType
+                    "__substg1.0_1000001E", //PidTagBody ASCII
+                    "__substg1.0_1000001F", //PidTagBody Unicode
+                    "__substg1.0_0047001E", //PidTagMessageSubmissionId ASCII
+                    "__substg1.0_0047001F", //PidTagMessageSubmissionId Unicode
+                    "__substg1.0_0037001E", //PidTagSubject ASCII
+                    "__substg1.0_0037001F", //PidTagSubject Unicode
+            };
+            for (String entryName : outlookEntryNames) {
+                if (poifsDir.hasEntry(entryName)) {
+                    return new OutlookTextExtractor(poifsDir);
+                }
             }
+        } finally {
+            Biff8EncryptionKey.setCurrentUserPassword(oldPW);
         }
 
-        throw new IllegalArgumentException("No supported documents found in the OLE2 stream");
+        return null;
     }
 
     /**
@@ -120,10 +148,9 @@ public class OLE2ScratchpadExtractorFact
      * @param ext the extractor holding the directory to start parsing
      * @param dirs a list to be filled with directory references holding embedded
      * @param nonPOIFS a list to be filled with streams which aren't based on POIFS entries
-     *
-     * @throws IOException when the format specific extraction fails because of invalid entires
      */
-    public static void identifyEmbeddedResources(POIOLE2TextExtractor ext, List<Entry> dirs, List<InputStream> nonPOIFS) throws IOException {
+    @Override
+    public void identifyEmbeddedResources(POIOLE2TextExtractor ext, List<Entry> dirs, List<InputStream> nonPOIFS) {
         // Find all the embedded directories
         DirectoryEntry root = ext.getRoot();
         if (root == null) {
@@ -132,25 +159,16 @@ public class OLE2ScratchpadExtractorFact
 
         if (ext instanceof ExcelExtractor) {
             // These are in MBD... under the root
-            Iterator<Entry> it = root.getEntries();
-            while (it.hasNext()) {
-                Entry entry = it.next();
-                if (entry.getName().startsWith("MBD")) {
-                    dirs.add(entry);
-                }
-            }
+            StreamSupport.stream(root.spliterator(), false)
+                .filter(entry -> entry.getName().startsWith("MBD"))
+                .forEach(dirs::add);
         } else if (ext instanceof WordExtractor) {
             // These are in ObjectPool -> _... under the root
             try {
-                DirectoryEntry op = (DirectoryEntry)
-                        root.getEntry("ObjectPool");
-                Iterator<Entry> it = op.getEntries();
-                while(it.hasNext()) {
-                    Entry entry = it.next();
-                    if(entry.getName().startsWith("_")) {
-                        dirs.add(entry);
-                    }
-                }
+                DirectoryEntry op = (DirectoryEntry) root.getEntry("ObjectPool");
+                StreamSupport.stream(op.spliterator(), false)
+                    .filter(entry -> entry.getName().startsWith("_"))
+                    .forEach(dirs::add);
             } catch(FileNotFoundException e) {
                 logger.log(POILogger.INFO, "Ignoring FileNotFoundException while extracting Word document", e.getLocalizedMessage());
                 // ignored here

Modified: poi/trunk/src/scratchpad/src/org/apache/poi/hdgf/extractor/VisioTextExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hdgf/extractor/VisioTextExtractor.java?rev=1880839&r1=1880838&r2=1880839&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hdgf/extractor/VisioTextExtractor.java (original)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hdgf/extractor/VisioTextExtractor.java Thu Aug 13 21:08:24 2020
@@ -17,7 +17,6 @@
 
 package org.apache.poi.hdgf.extractor;
 
-import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.util.ArrayList;
@@ -38,11 +37,11 @@ import org.apache.poi.poifs.filesystem.P
  * Can operate on the command line (outputs to stdout), or
  *  can return the text for you (example: for use with Lucene).
  */
-public final class VisioTextExtractor extends POIOLE2TextExtractor {
+public final class VisioTextExtractor implements POIOLE2TextExtractor {
 	private HDGFDiagram hdgf;
+	private boolean doCloseFilesystem = true;
 
 	public VisioTextExtractor(HDGFDiagram hdgf) {
-		super(hdgf);
 		this.hdgf = hdgf;
 	}
 	public VisioTextExtractor(POIFSFileSystem fs) throws IOException {
@@ -91,9 +90,7 @@ public final class VisioTextExtractor ex
 						// Capture the text, as long as it isn't
 						//  simply an empty string
 						String str = cmd.getValue().toString();
-						if(str.isEmpty() || "\n".equals(str)) {
-							// Ignore empty strings
-						} else {
+						if (!(str.isEmpty() || "\n".equals(str))) {
 							text.add( str );
 						}
 					}
@@ -121,21 +118,23 @@ public final class VisioTextExtractor ex
 		return text.toString();
 	}
 
-	public static void main(String[] args) throws Exception {
-		if(args.length == 0) {
-			System.err.println("Use:");
-			System.err.println("   VisioTextExtractor <file.vsd>");
-			System.exit(1);
-		}
+	@Override
+	public HDGFDiagram getDocument() {
+		return hdgf;
+	}
 
-		try (FileInputStream fis = new FileInputStream(args[0])) {
-			VisioTextExtractor extractor =
-				new VisioTextExtractor(fis);
+	@Override
+	public void setCloseFilesystem(boolean doCloseFilesystem) {
+		this.doCloseFilesystem = doCloseFilesystem;
+	}
 
-			// Print not PrintLn as already has \n added to it
-			System.out.print(extractor.getText());
+	@Override
+	public boolean isCloseFilesystem() {
+		return doCloseFilesystem;
+	}
 
-			extractor.close();
-		}
+	@Override
+	public HDGFDiagram getFilesystem() {
+		return hdgf;
 	}
 }

Modified: poi/trunk/src/scratchpad/src/org/apache/poi/hpbf/extractor/PublisherTextExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hpbf/extractor/PublisherTextExtractor.java?rev=1880839&r1=1880838&r2=1880839&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hpbf/extractor/PublisherTextExtractor.java (original)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hpbf/extractor/PublisherTextExtractor.java Thu Aug 13 21:08:24 2020
@@ -17,35 +17,37 @@
 
 package org.apache.poi.hpbf.extractor;
 
-import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.InputStream;
 
 import org.apache.poi.extractor.POIOLE2TextExtractor;
 import org.apache.poi.hpbf.HPBFDocument;
 import org.apache.poi.hpbf.model.qcbits.QCBit;
-import org.apache.poi.hpbf.model.qcbits.QCTextBit;
 import org.apache.poi.hpbf.model.qcbits.QCPLCBit.Type12;
+import org.apache.poi.hpbf.model.qcbits.QCTextBit;
 import org.apache.poi.poifs.filesystem.DirectoryNode;
 import org.apache.poi.poifs.filesystem.POIFSFileSystem;
 
 /**
  * Extract text from HPBF Publisher files
  */
-public final class PublisherTextExtractor extends POIOLE2TextExtractor {
-   private HPBFDocument doc;
+public final class PublisherTextExtractor implements POIOLE2TextExtractor {
+   private final HPBFDocument doc;
    private boolean hyperlinksByDefault;
+   private boolean doCloseFilesystem = true;
 
    public PublisherTextExtractor(HPBFDocument doc) {
-      super(doc);
       this.doc = doc;
    }
+
    public PublisherTextExtractor(DirectoryNode dir) throws IOException {
       this(new HPBFDocument(dir));
    }
+
    public PublisherTextExtractor(POIFSFileSystem fs) throws IOException {
       this(new HPBFDocument(fs));
    }
+
    public PublisherTextExtractor(InputStream is) throws IOException {
       this(new POIFSFileSystem(is));
    }
@@ -66,7 +68,7 @@ public final class PublisherTextExtracto
         // Get the text from the Quill Contents
         QCBit[] bits = doc.getQuillContents().getBits();
         for (QCBit bit1 : bits) {
-            if (bit1 != null && bit1 instanceof QCTextBit) {
+            if (bit1 instanceof QCTextBit) {
                 QCTextBit t = (QCTextBit) bit1;
                 text.append(t.getText().replace('\r', '\n'));
             }
@@ -79,7 +81,7 @@ public final class PublisherTextExtracto
         //  how to tie that together.
         if(hyperlinksByDefault) {
             for (QCBit bit : bits) {
-                if (bit != null && bit instanceof Type12) {
+                if (bit instanceof Type12) {
                     Type12 hyperlinks = (Type12) bit;
                     for (int j = 0; j < hyperlinks.getNumberOfHyperlinks(); j++) {
                         text.append("<");
@@ -96,19 +98,23 @@ public final class PublisherTextExtracto
         return text.toString();
     }
 
+    @Override
+    public HPBFDocument getDocument() {
+        return doc;
+    }
+
+    @Override
+    public void setCloseFilesystem(boolean doCloseFilesystem) {
+        this.doCloseFilesystem = doCloseFilesystem;
+    }
 
-    public static void main(String[] args) throws Exception {
-        if(args.length == 0) {
-            System.err.println("Use:");
-            System.err.println("  PublisherTextExtractor <file.pub>");
-        }
+    @Override
+    public boolean isCloseFilesystem() {
+        return doCloseFilesystem;
+    }
 
-        for (String arg : args) {
-            try (FileInputStream fis = new FileInputStream(arg)) {
-                PublisherTextExtractor te = new PublisherTextExtractor(fis);
-                System.out.println(te.getText());
-                te.close();
-            }
-        }
+    @Override
+    public HPBFDocument getFilesystem() {
+        return doc;
     }
 }

Modified: poi/trunk/src/scratchpad/src/org/apache/poi/hslf/usermodel/HSLFSlideShow.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hslf/usermodel/HSLFSlideShow.java?rev=1880839&r1=1880838&r2=1880839&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hslf/usermodel/HSLFSlideShow.java (original)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hslf/usermodel/HSLFSlideShow.java Thu Aug 13 21:08:24 2020
@@ -33,6 +33,7 @@ import java.util.List;
 import java.util.Map;
 import java.util.function.Supplier;
 
+import org.apache.poi.POIDocument;
 import org.apache.poi.common.usermodel.GenericRecord;
 import org.apache.poi.common.usermodel.fonts.FontInfo;
 import org.apache.poi.ddf.EscherBSERecord;
@@ -40,6 +41,9 @@ import org.apache.poi.ddf.EscherContaine
 import org.apache.poi.ddf.EscherOptRecord;
 import org.apache.poi.hpsf.ClassID;
 import org.apache.poi.hpsf.ClassIDPredefined;
+import org.apache.poi.hpsf.DocumentSummaryInformation;
+import org.apache.poi.hpsf.PropertySet;
+import org.apache.poi.hpsf.SummaryInformation;
 import org.apache.poi.hpsf.extractor.HPSFPropertiesExtractor;
 import org.apache.poi.hslf.exceptions.CorruptPowerPointFileException;
 import org.apache.poi.hslf.exceptions.HSLFException;
@@ -47,6 +51,7 @@ import org.apache.poi.hslf.model.Headers
 import org.apache.poi.hslf.model.MovieShape;
 import org.apache.poi.hslf.record.*;
 import org.apache.poi.hslf.record.SlideListWithText.SlideAtomsSet;
+import org.apache.poi.poifs.crypt.EncryptionInfo;
 import org.apache.poi.poifs.filesystem.DirectoryNode;
 import org.apache.poi.poifs.filesystem.Ole10Native;
 import org.apache.poi.poifs.filesystem.POIFSFileSystem;
@@ -66,7 +71,7 @@ import org.apache.poi.util.Units;
  * TODO: - figure out how to match notes to their correct sheet (will involve
  * understanding DocSlideList and DocNotesList) - handle Slide creation cleaner
  */
-public final class HSLFSlideShow implements SlideShow<HSLFShape,HSLFTextParagraph>, Closeable, GenericRecord {
+public final class HSLFSlideShow extends POIDocument implements SlideShow<HSLFShape,HSLFTextParagraph>, Closeable, GenericRecord {
 
 	//arbitrarily selected; may need to increase
 	private static final int MAX_RECORD_LENGTH = 10_000_000;
@@ -111,6 +116,8 @@ public final class HSLFSlideShow impleme
 	 * @param hslfSlideShow the HSLFSlideShow to base on
 	 */
 	public HSLFSlideShow(HSLFSlideShowImpl hslfSlideShow) {
+		super(hslfSlideShow.getDirectory());
+
 	    loadSavePhase.set(LoadSavePhase.INIT);
 
 	    // Get useful things from our base slideshow
@@ -1080,7 +1087,7 @@ public final class HSLFSlideShow impleme
     public HPSFPropertiesExtractor getMetadataTextExtractor() {
         return new HPSFPropertiesExtractor(getSlideShowImpl());
     }
-	
+
 	int addToObjListAtom(RecordContainer exObj) {
 		ExObjList lst = getDocumentRecord().getExObjList(true);
 		ExObjListAtom objAtom = lst.getExObjListAtom();
@@ -1097,7 +1104,7 @@ public final class HSLFSlideShow impleme
     	Map<String,ClassID> olemap = new HashMap<>();
     	olemap.put(POWERPOINT_DOCUMENT, ClassIDPredefined.POWERPOINT_V8.getClassID());
     	// as per BIFF8 spec
-    	olemap.put("Workbook", ClassIDPredefined.EXCEL_V8.getClassID()); 
+    	olemap.put("Workbook", ClassIDPredefined.EXCEL_V8.getClassID());
     	// Typically from third party programs
     	olemap.put("WORKBOOK", ClassIDPredefined.EXCEL_V8.getClassID());
     	// Typically odd Crystal Reports exports
@@ -1179,4 +1186,94 @@ public final class HSLFSlideShow impleme
 	public List<? extends GenericRecord> getGenericChildren() {
 		return Arrays.asList(_hslfSlideShow.getRecords());
 	}
+
+	@Override
+	public void write() throws IOException {
+		getSlideShowImpl().write();
+	}
+
+	@Override
+	public void write(File newFile) throws IOException {
+		getSlideShowImpl().write(newFile);
+	}
+
+	@Override
+	public DocumentSummaryInformation getDocumentSummaryInformation() {
+		return getSlideShowImpl().getDocumentSummaryInformation();
+	}
+
+	@Override
+	public SummaryInformation getSummaryInformation() {
+		return getSlideShowImpl().getSummaryInformation();
+	}
+
+	@Override
+	public void createInformationProperties() {
+		getSlideShowImpl().createInformationProperties();
+	}
+
+	@Override
+	public void readProperties() {
+		getSlideShowImpl().readProperties();
+	}
+
+	@Override
+	protected PropertySet getPropertySet(String setName) throws IOException {
+		return getSlideShowImpl().getPropertySetImpl(setName);
+	}
+
+	@Override
+	protected PropertySet getPropertySet(String setName, EncryptionInfo encryptionInfo) throws IOException {
+		return getSlideShowImpl().getPropertySetImpl(setName, encryptionInfo);
+	}
+
+	@Override
+	protected void writeProperties() throws IOException {
+		getSlideShowImpl().writePropertiesImpl();
+	}
+
+	@Override
+	public void writeProperties(POIFSFileSystem outFS) throws IOException {
+		getSlideShowImpl().writeProperties(outFS);
+	}
+
+	@Override
+	protected void writeProperties(POIFSFileSystem outFS, List<String> writtenEntries) throws IOException {
+		getSlideShowImpl().writePropertiesImpl(outFS, writtenEntries);
+	}
+
+	@Override
+	protected void validateInPlaceWritePossible() throws IllegalStateException {
+		getSlideShowImpl().validateInPlaceWritePossibleImpl();
+	}
+
+	@Override
+	public DirectoryNode getDirectory() {
+		return getSlideShowImpl().getDirectory();
+	}
+
+	@Override
+	protected void clearDirectory() {
+		getSlideShowImpl().clearDirectoryImpl();
+	}
+
+	@Override
+	protected boolean initDirectory() {
+		return getSlideShowImpl().initDirectoryImpl();
+	}
+
+	@Override
+	protected void replaceDirectory(DirectoryNode newDirectory) {
+		getSlideShowImpl().replaceDirectoryImpl(newDirectory);
+	}
+
+	@Override
+	protected String getEncryptedPropertyStreamName() {
+		return getSlideShowImpl().getEncryptedPropertyStreamName();
+	}
+
+	@Override
+	public EncryptionInfo getEncryptionInfo() throws IOException {
+		return getSlideShowImpl().getEncryptionInfo();
+	}
 }

Modified: poi/trunk/src/scratchpad/src/org/apache/poi/hslf/usermodel/HSLFSlideShowImpl.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hslf/usermodel/HSLFSlideShowImpl.java?rev=1880839&r1=1880838&r2=1880839&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hslf/usermodel/HSLFSlideShowImpl.java (original)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hslf/usermodel/HSLFSlideShowImpl.java Thu Aug 13 21:08:24 2020
@@ -36,6 +36,7 @@ import java.util.NavigableMap;
 import java.util.TreeMap;
 
 import org.apache.poi.POIDocument;
+import org.apache.poi.hpsf.PropertySet;
 import org.apache.poi.hslf.exceptions.CorruptPowerPointFileException;
 import org.apache.poi.hslf.exceptions.HSLFException;
 import org.apache.poi.hslf.exceptions.OldPowerPointFormatException;
@@ -714,8 +715,6 @@ public final class HSLFSlideShowImpl ext
     }
 
 
-
-
     /* ******************* adding methods follow ********************* */
 
     /**
@@ -850,6 +849,38 @@ public final class HSLFSlideShowImpl ext
         return "EncryptedSummary";
     }
 
+    void writePropertiesImpl() throws IOException {
+        super.writeProperties();
+    }
+
+    PropertySet getPropertySetImpl(String setName) throws IOException {
+        return super.getPropertySet(setName);
+    }
+
+    PropertySet getPropertySetImpl(String setName, EncryptionInfo encryptionInfo) throws IOException {
+        return super.getPropertySet(setName, encryptionInfo);
+    }
+
+    void writePropertiesImpl(POIFSFileSystem outFS, List<String> writtenEntries) throws IOException {
+        super.writeProperties(outFS, writtenEntries);
+    }
+
+    void validateInPlaceWritePossibleImpl() throws IllegalStateException {
+        super.validateInPlaceWritePossible();
+    }
+
+    void clearDirectoryImpl() {
+        super.clearDirectory();
+    }
+
+    boolean initDirectoryImpl() {
+        return super.initDirectory();
+    }
+
+    void replaceDirectoryImpl(DirectoryNode newDirectory) {
+        super.replaceDirectory(newDirectory);
+    }
+
     private static class BufAccessBAOS extends ByteArrayOutputStream {
         public byte[] getBuf() {
             return buf;

Modified: poi/trunk/src/scratchpad/src/org/apache/poi/hsmf/extractor/OutlookTextExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hsmf/extractor/OutlookTextExtractor.java?rev=1880839&r1=1880838&r2=1880839&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hsmf/extractor/OutlookTextExtractor.java (original)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hsmf/extractor/OutlookTextExtractor.java Thu Aug 13 21:08:24 2020
@@ -42,9 +42,12 @@ import org.apache.poi.util.LocaleUtil;
  *
  * @since 4.1.2
  */
-public class OutlookTextExtractor extends POIOLE2TextExtractor {
+public class OutlookTextExtractor implements POIOLE2TextExtractor {
+    private final MAPIMessage msg;
+    private boolean doCloseFilesystem = true;
+
     public OutlookTextExtractor(MAPIMessage msg) {
-        super(msg);
+        this.msg = msg;
     }
 
     public OutlookTextExtractor(DirectoryNode poifsDir) throws IOException {
@@ -76,14 +79,13 @@ public class OutlookTextExtractor extend
      * Returns the underlying MAPI message
      */
     public MAPIMessage getMAPIMessage() {
-        return (MAPIMessage) document;
+        return msg;
     }
 
     /**
      * Outputs something a little like a RFC822 email
      */
     public String getText() {
-        MAPIMessage msg = (MAPIMessage) document;
         StringBuilder s = new StringBuilder();
 
         // See if we can get a suitable encoding for any
@@ -201,4 +203,24 @@ public class OutlookTextExtractor extend
         }
         s.append("\n");
     }
+
+    @Override
+    public MAPIMessage getDocument() {
+        return msg;
+    }
+
+    @Override
+    public void setCloseFilesystem(boolean doCloseFilesystem) {
+        this.doCloseFilesystem = doCloseFilesystem;
+    }
+
+    @Override
+    public boolean isCloseFilesystem() {
+        return doCloseFilesystem;
+    }
+
+    @Override
+    public MAPIMessage getFilesystem() {
+        return msg;
+    }
 }

Modified: poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/extractor/Word6Extractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/extractor/Word6Extractor.java?rev=1880839&r1=1880838&r2=1880839&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/extractor/Word6Extractor.java (original)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/extractor/Word6Extractor.java Thu Aug 13 21:08:24 2020
@@ -31,13 +31,14 @@ import org.apache.poi.poifs.filesystem.P
  * Class to extract the text from old (Word 6 / Word 95) Word Documents.
  *
  * This should only be used on the older files, for most uses you
- *  should call {@link WordExtractor} which deals properly 
+ *  should call {@link WordExtractor} which deals properly
  *  with HWPF.
  *
  * @author Nick Burch
  */
-public final class Word6Extractor extends POIOLE2TextExtractor {
+public final class Word6Extractor implements POIOLE2TextExtractor {
 	private HWPFOldDocument doc;
+	private boolean doCloseFilesystem = true;
 
 	/**
 	 * Create a new Word Extractor
@@ -49,12 +50,11 @@ public final class Word6Extractor extend
 
     /**
      * Create a new Word Extractor
-     * 
+     *
      * @param fs
      *            POIFSFileSystem containing the word file
      */
-    public Word6Extractor( POIFSFileSystem fs ) throws IOException
-    {
+    public Word6Extractor( POIFSFileSystem fs ) throws IOException {
         this( fs.getRoot() );
     }
 
@@ -62,14 +62,11 @@ public final class Word6Extractor extend
      * @deprecated Use {@link #Word6Extractor(DirectoryNode)} instead
      */
     @Deprecated
-    public Word6Extractor( DirectoryNode dir, POIFSFileSystem fs )
-            throws IOException
-    {
+    public Word6Extractor( DirectoryNode dir, POIFSFileSystem fs ) throws IOException {
         this( dir );
     }
 
-    public Word6Extractor( DirectoryNode dir ) throws IOException
-    {
+    public Word6Extractor( DirectoryNode dir ) throws IOException {
         this( new HWPFOldDocument( dir ) );
     }
 
@@ -78,7 +75,6 @@ public final class Word6Extractor extend
 	 * @param doc The HWPFOldDocument to extract from
 	 */
 	public Word6Extractor(HWPFOldDocument doc) {
-		super(doc);
 		this.doc = doc;
 	}
 
@@ -101,7 +97,7 @@ public final class Word6Extractor extend
 	        ret = new String[doc.getTextTable().getTextPieces().size()];
 	        for(int i=0; i<ret.length; i++) {
 	            ret[i] = doc.getTextTable().getTextPieces().get(i).getStringBuilder().toString();
-	            
+
 	            // Fix the line endings
 	            ret[i] = ret[i].replaceAll("\r", "\ufffe");
 	            ret[i] = ret[i].replaceAll("\ufffe","\r\n");
@@ -111,25 +107,40 @@ public final class Word6Extractor extend
 	    return ret;
 	}
 
-    public String getText()
-    {
-        try
-        {
+    public String getText() {
+        try {
             WordToTextConverter wordToTextConverter = new WordToTextConverter();
             wordToTextConverter.processDocument( doc );
             return wordToTextConverter.getText();
-        }
-        catch ( Exception exc )
-        {
+        } catch ( Exception exc ) {
             // fall-back
             StringBuilder text = new StringBuilder();
 
-            for ( String t : getParagraphText() )
-            {
+            for ( String t : getParagraphText() ) {
                 text.append( t );
             }
 
             return text.toString();
         }
     }
+
+    @Override
+    public HWPFOldDocument getDocument() {
+        return doc;
+    }
+
+    @Override
+    public void setCloseFilesystem(boolean doCloseFilesystem) {
+        this.doCloseFilesystem = doCloseFilesystem;
+    }
+
+    @Override
+    public boolean isCloseFilesystem() {
+        return doCloseFilesystem;
+    }
+
+    @Override
+    public HWPFOldDocument getFilesystem() {
+        return doc;
+    }
 }

Modified: poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/extractor/WordExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/extractor/WordExtractor.java?rev=1880839&r1=1880838&r2=1880839&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/extractor/WordExtractor.java (original)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/extractor/WordExtractor.java Thu Aug 13 21:08:24 2020
@@ -17,7 +17,6 @@
 
 package org.apache.poi.hwpf.extractor;
 
-import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.InputStream;
 
@@ -39,8 +38,9 @@ import org.apache.poi.poifs.filesystem.P
  *
  * @author Nick Burch
  */
-public final class WordExtractor extends POIOLE2TextExtractor {
-    private HWPFDocument doc;
+public final class WordExtractor implements POIOLE2TextExtractor {
+    private final HWPFDocument doc;
+    private boolean doCloseFilesystem = true;
 
     /**
      * Create a new Word Extractor
@@ -73,30 +73,10 @@ public final class WordExtractor extends
      *            The HWPFDocument to extract from
      */
     public WordExtractor( HWPFDocument doc ) {
-        super( doc );
         this.doc = doc;
     }
 
     /**
-     * Command line extractor, so people will stop moaning that they can't just
-     * run this.
-     */
-    public static void main( String[] args ) throws IOException {
-        if ( args.length == 0 ) {
-            System.err.println( "Use:" );
-            System.err
-                    .println( "   java org.apache.poi.hwpf.extractor.WordExtractor <filename>" );
-            System.exit( 1 );
-        }
-
-        // Process the first argument as a file
-        InputStream fin = new FileInputStream( args[0] );
-        try (WordExtractor extractor = new WordExtractor(fin)) {
-            System.out.println(extractor.getText());
-        }
-    }
-
-    /**
      * Get the text from the word file, as an array with one String per
      * paragraph
      */
@@ -142,7 +122,7 @@ public final class WordExtractor extends
         return getParagraphText( r );
     }
 
-    protected static String[] getParagraphText( Range r ) {
+    static String[] getParagraphText( Range r ) {
         String[] ret;
         ret = new String[r.numParagraphs()];
         for ( int i = 0; i < ret.length; i++ ) {
@@ -287,8 +267,27 @@ public final class WordExtractor extends
     /**
      * Removes any fields (eg macros, page markers etc) from the string.
      */
-    public static String stripFields( String text )
-    {
+    public static String stripFields( String text ) {
         return Range.stripFields( text );
     }
+
+    @Override
+    public HWPFDocument getDocument() {
+        return doc;
+    }
+
+    @Override
+    public void setCloseFilesystem(boolean doCloseFilesystem) {
+        this.doCloseFilesystem = doCloseFilesystem;
+    }
+
+    @Override
+    public boolean isCloseFilesystem() {
+        return doCloseFilesystem;
+    }
+
+    @Override
+    public HWPFDocument getFilesystem() {
+        return doc;
+    }
 }

Modified: poi/trunk/src/scratchpad/testcases/org/apache/poi/hdgf/extractor/TestVisioExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/testcases/org/apache/poi/hdgf/extractor/TestVisioExtractor.java?rev=1880839&r1=1880838&r2=1880839&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/testcases/org/apache/poi/hdgf/extractor/TestVisioExtractor.java (original)
+++ poi/trunk/src/scratchpad/testcases/org/apache/poi/hdgf/extractor/TestVisioExtractor.java Thu Aug 13 21:08:24 2020
@@ -19,12 +19,9 @@ package org.apache.poi.hdgf.extractor;
 
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertTrue;
 
-import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
-import java.io.PrintStream;
 
 import org.apache.poi.POIDataSamples;
 import org.apache.poi.hdgf.HDGFDiagram;
@@ -32,7 +29,7 @@ import org.apache.poi.poifs.filesystem.P
 import org.junit.Test;
 
 public final class TestVisioExtractor {
-    private static POIDataSamples _dgTests = POIDataSamples.getDiagramInstance();
+    private static final POIDataSamples _dgTests = POIDataSamples.getDiagramInstance();
 
 	private final String defFilename = "Test_Visio-Some_Random_Text.vsd";
 	private final int defTextChunks = 5;
@@ -63,7 +60,7 @@ public final class TestVisioExtractor {
         is3.close();
         HDGFDiagram hdgf3 = new HDGFDiagram(poifs3);
 
-		
+
         VisioTextExtractor extractor3 = new VisioTextExtractor(hdgf3);
 		assertNotNull(extractor3);
 		assertNotNull(extractor3.getAllText());
@@ -97,7 +94,7 @@ public final class TestVisioExtractor {
     @Test
 	public void testProblemFiles() throws Exception {
 		String[] files = {
-		      "44594.vsd", "44594-2.vsd", 
+		      "44594.vsd", "44594-2.vsd",
 		      "ShortChunk1.vsd", "ShortChunk2.vsd", "ShortChunk3.vsd",
 		      "NegativeChunkLength.vsd", "NegativeChunkLength2.vsd"
 		};
@@ -108,31 +105,6 @@ public final class TestVisioExtractor {
         }
 	}
 
-    @Test
-	public void testMain() throws Exception {
-		PrintStream oldOut = System.out;
-		ByteArrayOutputStream baos = new ByteArrayOutputStream();
-		PrintStream capture = new PrintStream(baos);
-		System.setOut(capture);
-
-        String path = _dgTests.getFile(defFilename).getPath();
-        VisioTextExtractor.main(new String[] {path});
-
-		// Put things back
-		System.setOut(oldOut);
-
-		// Check
-		capture.flush();
-		String text = baos.toString();
-        // YK: stdout can contain lots of other stuff if logging is sent to console
-        // ( -Dorg.apache.poi.util.POILogger=org.apache.poi.util.SystemOutLogger)
-		assertTrue( text.contains(
-		      "text\nView\n" +
-		      "Test View\nI am a test view\n" +
-		      "Some random text, on a page\n"
-		      ));
-	}
-    
     private VisioTextExtractor openExtractor(String fileName) throws IOException {
         try (InputStream is = _dgTests.openResourceAsStream(fileName)) {
             return new VisioTextExtractor(is);



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@poi.apache.org
For additional commands, e-mail: commits-help@poi.apache.org