You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@poi.apache.org by ki...@apache.org on 2020/08/13 21:08:25 UTC
svn commit: r1880839 [2/3] - in /poi/trunk/src:
integrationtest/org/apache/poi/stress/ java/org/apache/poi/extractor/
java/org/apache/poi/hpsf/extractor/ java/org/apache/poi/hssf/extractor/
java/org/apache/poi/sl/extractor/ java/org/apache/poi/ss/extra...
Modified: poi/trunk/src/ooxml/java/org/apache/poi/ooxml/extractor/POIXMLTextExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/ooxml/extractor/POIXMLTextExtractor.java?rev=1880839&r1=1880838&r2=1880839&view=diff
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/ooxml/extractor/POIXMLTextExtractor.java (original)
+++ poi/trunk/src/ooxml/java/org/apache/poi/ooxml/extractor/POIXMLTextExtractor.java Thu Aug 13 21:08:24 2020
@@ -27,61 +27,48 @@ import org.apache.poi.ooxml.POIXMLProper
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.util.ZipSecureFile;
-public abstract class POIXMLTextExtractor extends POITextExtractor {
- /** The POIXMLDocument that's open */
- private final POIXMLDocument _document;
-
- /**
- * Creates a new text extractor for the given document
- *
- * @param document the document to extract from
- */
- public POIXMLTextExtractor(POIXMLDocument document) {
- _document = document;
- }
-
+public interface POIXMLTextExtractor extends POITextExtractor {
/**
* Returns the core document properties
- *
+ *
* @return the core document properties
*/
- public CoreProperties getCoreProperties() {
- return _document.getProperties().getCoreProperties();
+ default CoreProperties getCoreProperties() {
+ return getDocument().getProperties().getCoreProperties();
}
/**
* Returns the extended document properties
- *
+ *
* @return the extended document properties
*/
- public ExtendedProperties getExtendedProperties() {
- return _document.getProperties().getExtendedProperties();
+ default ExtendedProperties getExtendedProperties() {
+ return getDocument().getProperties().getExtendedProperties();
}
/**
* Returns the custom document properties
- *
+ *
* @return the custom document properties
*/
- public CustomProperties getCustomProperties() {
- return _document.getProperties().getCustomProperties();
+ default CustomProperties getCustomProperties() {
+ return getDocument().getProperties().getCustomProperties();
}
/**
* Returns opened document
- *
+ *
* @return the opened document
*/
@Override
- public final POIXMLDocument getDocument() {
- return _document;
- }
+ POIXMLDocument getDocument();
/**
* Returns the opened OPCPackage that contains the document
- *
+ *
* @return the opened OPCPackage
*/
- public OPCPackage getPackage() {
- return _document.getPackage();
+ default OPCPackage getPackage() {
+ POIXMLDocument doc = getDocument();
+ return doc != null ? doc.getPackage() : null;
}
/**
@@ -89,25 +76,24 @@ public abstract class POIXMLTextExtracto
* document properties metadata, such as title and author.
*/
@Override
- public POIXMLPropertiesTextExtractor getMetadataTextExtractor() {
- return new POIXMLPropertiesTextExtractor(_document);
+ default POIXMLPropertiesTextExtractor getMetadataTextExtractor() {
+ return new POIXMLPropertiesTextExtractor(getDocument());
}
@Override
- public void close() throws IOException {
+ default void close() throws IOException {
// e.g. XSSFEventBaseExcelExtractor passes a null-document
- if(_document != null) {
+ if (isCloseFilesystem()) {
@SuppressWarnings("resource")
- OPCPackage pkg = _document.getPackage();
- if(pkg != null) {
+ OPCPackage pkg = getPackage();
+ if (pkg != null) {
// revert the package to not re-write the file, which is very likely not wanted for a TextExtractor!
pkg.revert();
}
}
- super.close();
}
- protected void checkMaxTextSize(CharSequence text, String string) {
+ default void checkMaxTextSize(CharSequence text, String string) {
if(string == null) {
return;
}
Modified: poi/trunk/src/ooxml/java/org/apache/poi/xdgf/extractor/XDGFVisioExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/xdgf/extractor/XDGFVisioExtractor.java?rev=1880839&r1=1880838&r2=1880839&view=diff
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/xdgf/extractor/XDGFVisioExtractor.java (original)
+++ poi/trunk/src/ooxml/java/org/apache/poi/xdgf/extractor/XDGFVisioExtractor.java Thu Aug 13 21:08:24 2020
@@ -18,7 +18,6 @@ package org.apache.poi.xdgf.extractor;
import java.io.IOException;
-import org.apache.poi.ooxml.POIXMLDocument;
import org.apache.poi.ooxml.extractor.POIXMLTextExtractor;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xdgf.usermodel.XDGFPage;
@@ -28,12 +27,12 @@ import org.apache.poi.xdgf.usermodel.sha
/**
* Helper class to extract text from an OOXML Visio File
*/
-public class XDGFVisioExtractor extends POIXMLTextExtractor {
+public class XDGFVisioExtractor implements POIXMLTextExtractor {
protected final XmlVisioDocument document;
-
+ private boolean doCloseFilesystem = true;
+
public XDGFVisioExtractor(XmlVisioDocument document) {
- super(document);
this.document = document;
}
@@ -43,25 +42,31 @@ public class XDGFVisioExtractor extends
public String getText() {
ShapeTextVisitor visitor = new ShapeTextVisitor();
-
+
for (XDGFPage page: document.getPages()) {
page.getContent().visitShapes(visitor);
}
-
+
return visitor.getText();
}
-
- public static void main(String [] args) throws IOException {
- if (args.length < 1) {
- System.err.println("Use:");
- System.err.println(" XDGFVisioExtractor <filename.vsdx>");
- System.exit(1);
- }
- POIXMLTextExtractor extractor =
- new XDGFVisioExtractor(POIXMLDocument.openPackage(
- args[0]
- ));
- System.out.println(extractor.getText());
- extractor.close();
+
+ @Override
+ public XmlVisioDocument getDocument() {
+ return document;
+ }
+
+ @Override
+ public void setCloseFilesystem(boolean doCloseFilesystem) {
+ this.doCloseFilesystem = doCloseFilesystem;
+ }
+
+ @Override
+ public boolean isCloseFilesystem() {
+ return doCloseFilesystem;
+ }
+
+ @Override
+ public XmlVisioDocument getFilesystem() {
+ return document;
}
}
Added: poi/trunk/src/ooxml/java/org/apache/poi/xslf/extractor/XSLFExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/xslf/extractor/XSLFExtractor.java?rev=1880839&view=auto
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/xslf/extractor/XSLFExtractor.java (added)
+++ poi/trunk/src/ooxml/java/org/apache/poi/xslf/extractor/XSLFExtractor.java Thu Aug 13 21:08:24 2020
@@ -0,0 +1,45 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+package org.apache.poi.xslf.extractor;
+
+import org.apache.poi.ooxml.extractor.POIXMLPropertiesTextExtractor;
+import org.apache.poi.ooxml.extractor.POIXMLTextExtractor;
+import org.apache.poi.sl.extractor.SlideShowExtractor;
+import org.apache.poi.xslf.usermodel.XMLSlideShow;
+import org.apache.poi.xslf.usermodel.XSLFShape;
+import org.apache.poi.xslf.usermodel.XSLFTextParagraph;
+
+
+/**
+ * Helper class to extract text from an OOXML Powerpoint file
+ */
+public class XSLFExtractor extends SlideShowExtractor<XSLFShape, XSLFTextParagraph> implements POIXMLTextExtractor {
+ public XSLFExtractor(XMLSlideShow slideshow) {
+ super(slideshow);
+ }
+
+ @Override
+ public XMLSlideShow getDocument() {
+ return (XMLSlideShow)slideshow;
+ }
+
+ @Override
+ public POIXMLPropertiesTextExtractor getMetadataTextExtractor() {
+ return POIXMLTextExtractor.super.getMetadataTextExtractor();
+ }
+}
Propchange: poi/trunk/src/ooxml/java/org/apache/poi/xslf/extractor/XSLFExtractor.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: poi/trunk/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFBEventBasedExcelExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFBEventBasedExcelExtractor.java?rev=1880839&r1=1880838&r2=1880839&view=diff
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFBEventBasedExcelExtractor.java (original)
+++ poi/trunk/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFBEventBasedExcelExtractor.java Thu Aug 13 21:08:24 2020
@@ -19,7 +19,6 @@ package org.apache.poi.xssf.extractor;
import java.io.IOException;
import java.io.InputStream;
-import org.apache.poi.ooxml.extractor.POIXMLTextExtractor;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.ss.usermodel.DataFormatter;
@@ -43,8 +42,7 @@ import org.xml.sax.SAXException;
*
* @since 3.16-beta3
*/
-public class XSSFBEventBasedExcelExtractor extends XSSFEventBasedExcelExtractor
- implements org.apache.poi.ss.extractor.ExcelExtractor {
+public class XSSFBEventBasedExcelExtractor extends XSSFEventBasedExcelExtractor {
private static final POILogger LOGGER = POILogFactory.getLogger(XSSFBEventBasedExcelExtractor.class);
@@ -62,18 +60,6 @@ public class XSSFBEventBasedExcelExtract
super(container);
}
- public static void main(String[] args) throws Exception {
- if (args.length < 1) {
- System.err.println("Use:");
- System.err.println(" XSSFBEventBasedExcelExtractor <filename.xlsb>");
- System.exit(1);
- }
- POIXMLTextExtractor extractor =
- new XSSFBEventBasedExcelExtractor(args[0]);
- System.out.println(extractor.getText());
- extractor.close();
- }
-
public void setHandleHyperlinksInCells(boolean handleHyperlinksInCells) {
this.handleHyperlinksInCells = handleHyperlinksInCells;
}
Modified: poi/trunk/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFEventBasedExcelExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFEventBasedExcelExtractor.java?rev=1880839&r1=1880838&r2=1880839&view=diff
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFEventBasedExcelExtractor.java (original)
+++ poi/trunk/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFEventBasedExcelExtractor.java Thu Aug 13 21:08:24 2020
@@ -25,6 +25,7 @@ import java.util.Map;
import javax.xml.parsers.ParserConfigurationException;
+import org.apache.poi.ooxml.POIXMLDocument;
import org.apache.poi.ooxml.POIXMLProperties;
import org.apache.poi.ooxml.POIXMLProperties.CoreProperties;
import org.apache.poi.ooxml.POIXMLProperties.CustomProperties;
@@ -57,13 +58,13 @@ import org.xml.sax.XMLReader;
* Implementation of a text extractor from OOXML Excel
* files that uses SAX event based parsing.
*/
-public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
- implements org.apache.poi.ss.extractor.ExcelExtractor {
+public class XSSFEventBasedExcelExtractor
+ implements POIXMLTextExtractor, org.apache.poi.ss.extractor.ExcelExtractor {
private static final POILogger LOGGER = POILogFactory.getLogger(XSSFEventBasedExcelExtractor.class);
- protected OPCPackage container;
- protected POIXMLProperties properties;
+ protected final OPCPackage container;
+ protected final POIXMLProperties properties;
protected Locale locale;
protected boolean includeTextBoxes = true;
@@ -73,29 +74,17 @@ public class XSSFEventBasedExcelExtracto
protected boolean formulasNotResults;
protected boolean concatenatePhoneticRuns = true;
+ private boolean doCloseFilesystem = true;
+
public XSSFEventBasedExcelExtractor(String path) throws XmlException, OpenXML4JException, IOException {
this(OPCPackage.open(path));
}
public XSSFEventBasedExcelExtractor(OPCPackage container) throws XmlException, OpenXML4JException, IOException {
- super(null);
this.container = container;
-
properties = new POIXMLProperties(container);
}
- public static void main(String[] args) throws Exception {
- if (args.length < 1) {
- System.err.println("Use:");
- System.err.println(" XSSFEventBasedExcelExtractor <filename.xlsx>");
- System.exit(1);
- }
- POIXMLTextExtractor extractor =
- new XSSFEventBasedExcelExtractor(args[0]);
- System.out.println(extractor.getText());
- extractor.close();
- }
-
/**
* Should sheet names be included? Default is true
*/
@@ -319,12 +308,23 @@ public class XSSFEventBasedExcelExtracto
}
@Override
- public void close() throws IOException {
- if (container != null) {
- container.close();
- container = null;
- }
- super.close();
+ public POIXMLDocument getDocument() {
+ return null;
+ }
+
+ @Override
+ public void setCloseFilesystem(boolean doCloseFilesystem) {
+ this.doCloseFilesystem = doCloseFilesystem;
+ }
+
+ @Override
+ public boolean isCloseFilesystem() {
+ return doCloseFilesystem;
+ }
+
+ @Override
+ public OPCPackage getFilesystem() {
+ return container;
}
protected class SheetTextExtractor implements SheetContentsHandler {
Modified: poi/trunk/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFExcelExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFExcelExtractor.java?rev=1880839&r1=1880838&r2=1880839&view=diff
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFExcelExtractor.java (original)
+++ poi/trunk/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFExcelExtractor.java Thu Aug 13 21:08:24 2020
@@ -20,8 +20,8 @@ import java.io.IOException;
import java.util.Iterator;
import java.util.Locale;
-import org.apache.poi.ooxml.extractor.POIXMLTextExtractor;
import org.apache.poi.hssf.extractor.ExcelExtractor;
+import org.apache.poi.ooxml.extractor.POIXMLTextExtractor;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.ss.usermodel.Cell;
@@ -44,8 +44,8 @@ import org.apache.xmlbeans.XmlException;
/**
* Helper class to extract text from an OOXML Excel file
*/
-public class XSSFExcelExtractor extends POIXMLTextExtractor
- implements org.apache.poi.ss.extractor.ExcelExtractor {
+public class XSSFExcelExtractor
+ implements POIXMLTextExtractor, org.apache.poi.ss.extractor.ExcelExtractor {
public static final XSSFRelation[] SUPPORTED_TYPES = new XSSFRelation[] {
XSSFRelation.WORKBOOK, XSSFRelation.MACRO_TEMPLATE_WORKBOOK,
XSSFRelation.MACRO_ADDIN_WORKBOOK, XSSFRelation.TEMPLATE_WORKBOOK,
@@ -53,34 +53,21 @@ public class XSSFExcelExtractor extends
};
private Locale locale;
- private XSSFWorkbook workbook;
+ private final XSSFWorkbook workbook;
private boolean includeSheetNames = true;
private boolean formulasNotResults;
private boolean includeCellComments;
private boolean includeHeadersFooters = true;
private boolean includeTextBoxes = true;
+ private boolean doCloseFilesystem = true;
public XSSFExcelExtractor(OPCPackage container) throws XmlException, OpenXML4JException, IOException {
this(new XSSFWorkbook(container));
}
public XSSFExcelExtractor(XSSFWorkbook workbook) {
- super(workbook);
this.workbook = workbook;
}
- public static void main(String[] args) throws Exception {
- if(args.length < 1) {
- System.err.println("Use:");
- System.err.println(" XSSFExcelExtractor <filename.xlsx>");
- System.exit(1);
- }
-
- try (OPCPackage pkg = OPCPackage.create(args[0]);
- POIXMLTextExtractor extractor = new XSSFExcelExtractor(pkg)) {
- System.out.println(extractor.getText());
- }
- }
-
/**
* Should sheet names be included? Default is true
*/
@@ -194,7 +181,7 @@ public class XSSFExcelExtractor extends
}
text.append("\n");
}
-
+
// add textboxes
if (includeTextBoxes){
XSSFDrawing drawing = sheet.getDrawingPatriarch();
@@ -262,4 +249,24 @@ public class XSSFExcelExtractor extends
private String extractHeaderFooter(HeaderFooter hf) {
return ExcelExtractor._extractHeaderFooter(hf);
}
+
+ @Override
+ public XSSFWorkbook getDocument() {
+ return workbook;
+ }
+
+ @Override
+ public void setCloseFilesystem(boolean doCloseFilesystem) {
+ this.doCloseFilesystem = doCloseFilesystem;
+ }
+
+ @Override
+ public boolean isCloseFilesystem() {
+ return doCloseFilesystem;
+ }
+
+ @Override
+ public XSSFWorkbook getFilesystem() {
+ return workbook;
+ }
}
Modified: poi/trunk/src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFWordExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFWordExtractor.java?rev=1880839&r1=1880838&r2=1880839&view=diff
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFWordExtractor.java (original)
+++ poi/trunk/src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFWordExtractor.java Thu Aug 13 21:08:24 2020
@@ -19,9 +19,7 @@ package org.apache.poi.xwpf.extractor;
import java.io.IOException;
import java.util.List;
-import org.apache.poi.ooxml.POIXMLDocument;
import org.apache.poi.ooxml.extractor.POIXMLTextExtractor;
-import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xwpf.model.XWPFCommentsDecorator;
import org.apache.poi.xwpf.model.XWPFHeaderFooterPolicy;
@@ -39,46 +37,31 @@ import org.apache.poi.xwpf.usermodel.XWP
import org.apache.poi.xwpf.usermodel.XWPFTable;
import org.apache.poi.xwpf.usermodel.XWPFTableCell;
import org.apache.poi.xwpf.usermodel.XWPFTableRow;
-import org.apache.xmlbeans.XmlException;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSectPr;
/**
* Helper class to extract text from an OOXML Word file
*/
-public class XWPFWordExtractor extends POIXMLTextExtractor {
+public class XWPFWordExtractor implements POIXMLTextExtractor {
public static final XWPFRelation[] SUPPORTED_TYPES = {
XWPFRelation.DOCUMENT, XWPFRelation.TEMPLATE,
XWPFRelation.MACRO_DOCUMENT,
XWPFRelation.MACRO_TEMPLATE_DOCUMENT
};
- private XWPFDocument document;
+ private final XWPFDocument document;
private boolean fetchHyperlinks;
private boolean concatenatePhoneticRuns = true;
+ private boolean doCloseFilesystem = true;
- public XWPFWordExtractor(OPCPackage container) throws XmlException, OpenXML4JException, IOException {
+ public XWPFWordExtractor(OPCPackage container) throws IOException {
this(new XWPFDocument(container));
}
public XWPFWordExtractor(XWPFDocument document) {
- super(document);
this.document = document;
}
- public static void main(String[] args) throws Exception {
- if (args.length < 1) {
- System.err.println("Use:");
- System.err.println(" XWPFWordExtractor <filename.docx>");
- System.exit(1);
- }
- POIXMLTextExtractor extractor =
- new XWPFWordExtractor(POIXMLDocument.openPackage(
- args[0]
- ));
- System.out.println(extractor.getText());
- extractor.close();
- }
-
/**
* Should we also fetch the hyperlinks, when fetching
* the text content? Default is to only output the
@@ -217,4 +200,24 @@ public class XWPFWordExtractor extends P
text.append(hfPolicy.getDefaultHeader().getText());
}
}
+
+ @Override
+ public XWPFDocument getDocument() {
+ return document;
+ }
+
+ @Override
+ public void setCloseFilesystem(boolean doCloseFilesystem) {
+ this.doCloseFilesystem = doCloseFilesystem;
+ }
+
+ @Override
+ public boolean isCloseFilesystem() {
+ return doCloseFilesystem;
+ }
+
+ @Override
+ public XWPFDocument getFilesystem() {
+ return document;
+ }
}
Modified: poi/trunk/src/ooxml/testcases/org/apache/poi/extractor/ooxml/TestExtractorFactory.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/testcases/org/apache/poi/extractor/ooxml/TestExtractorFactory.java?rev=1880839&r1=1880838&r2=1880839&view=diff
==============================================================================
--- poi/trunk/src/ooxml/testcases/org/apache/poi/extractor/ooxml/TestExtractorFactory.java (original)
+++ poi/trunk/src/ooxml/testcases/org/apache/poi/extractor/ooxml/TestExtractorFactory.java Thu Aug 13 21:08:24 2020
@@ -31,23 +31,25 @@ import java.util.Locale;
import org.apache.poi.POIDataSamples;
import org.apache.poi.UnsupportedFileFormatException;
+import org.apache.poi.extractor.ExtractorFactory;
import org.apache.poi.extractor.POIOLE2TextExtractor;
import org.apache.poi.extractor.POITextExtractor;
import org.apache.poi.hssf.HSSFTestDataSamples;
-import org.apache.poi.hssf.OldExcelFormatException;
import org.apache.poi.hssf.extractor.EventBasedExcelExtractor;
import org.apache.poi.hssf.extractor.ExcelExtractor;
-import org.apache.poi.ooxml.extractor.ExtractorFactory;
-import org.apache.poi.ooxml.extractor.POIXMLTextExtractor;
+import org.apache.poi.ooxml.extractor.POIXMLExtractorFactory;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackageAccess;
import org.apache.poi.poifs.filesystem.FileMagic;
+import org.apache.poi.poifs.filesystem.NotOLE2FileException;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.xssf.extractor.XSSFEventBasedExcelExtractor;
import org.apache.poi.xssf.extractor.XSSFExcelExtractor;
import org.apache.xmlbeans.XmlException;
+import org.junit.Rule;
import org.junit.Test;
+import org.junit.rules.ExpectedException;
/**
* Test that the extractor factory plays nicely
@@ -89,6 +91,8 @@ public class TestExtractorFactory {
private static POIDataSamples pubTests = POIDataSamples.getPublisherInstance();
private static File pub = getFileAndCheck(pubTests, "Simple.pub");
+ private static final POIXMLExtractorFactory xmlFactory = new POIXMLExtractorFactory();
+
private static File getFileAndCheck(POIDataSamples samples, String name) {
File file = samples.getFile(name);
@@ -110,7 +114,7 @@ public class TestExtractorFactory {
"Word 6", doc6, "Word6Extractor", 20,
"Word 95", doc95, "Word6Extractor", 120,
"PowerPoint", ppt, "SlideShowExtractor", 120,
- "PowerPoint - pptx", pptx, "SlideShowExtractor", 120,
+ "PowerPoint - pptx", pptx, "XSLFExtractor", 120,
"Visio", vsd, "VisioTextExtractor", 50,
"Visio - vsdx", vsdx, "XDGFVisioExtractor", 20,
"Publisher", pub, "PublisherTextExtractor", 50,
@@ -125,6 +129,8 @@ public class TestExtractorFactory {
R apply(T t) throws IOException, OpenXML4JException, XmlException;
}
+ @Rule
+ public ExpectedException thrown = ExpectedException.none();
@Test
public void testFile() throws Exception {
@@ -135,12 +141,12 @@ public class TestExtractorFactory {
}
}
- @Test(expected = IllegalArgumentException.class)
+ @Test
public void testFileInvalid() throws Exception {
+ thrown.expectMessage("Can't create extractor - unsupported file type: UNKNOWN");
+ thrown.expect(IOException.class);
// Text
- try (POITextExtractor ignored = ExtractorFactory.createExtractor(txt)) {
- fail("extracting from invalid package");
- }
+ ExtractorFactory.createExtractor(txt);
}
@Test
@@ -148,8 +154,10 @@ public class TestExtractorFactory {
testStream(ExtractorFactory::createExtractor, true);
}
- @Test(expected = IllegalArgumentException.class)
+ @Test
public void testInputStreamInvalid() throws Exception {
+ thrown.expectMessage("Can't create extractor - unsupported file type: UNKNOWN");
+ thrown.expect(IOException.class);
testInvalid(ExtractorFactory::createExtractor);
}
@@ -158,8 +166,10 @@ public class TestExtractorFactory {
testStream((f) -> ExtractorFactory.createExtractor(new POIFSFileSystem(f)), false);
}
- @Test(expected = IOException.class)
+ @Test
public void testPOIFSInvalid() throws Exception {
+ thrown.expectMessage("Invalid header signature; read 0x3D20726F68747541, expected 0xE11AB1A1E011CFD0");
+ thrown.expect(NotOLE2FileException.class);
testInvalid((f) -> ExtractorFactory.createExtractor(new POIFSFileSystem(f)));
}
@@ -195,9 +205,7 @@ public class TestExtractorFactory {
POITextExtractor ignored = poifs.apply(fis)) {
fail("extracting from invalid package");
} catch (IllegalArgumentException e) {
- assertTrue("Had: " + e,
- e.getMessage().contains(FileMagic.UNKNOWN.name()));
-
+ assertTrue("Had: " + e, e.getMessage().contains(FileMagic.UNKNOWN.name()));
throw e;
}
}
@@ -211,7 +219,7 @@ public class TestExtractorFactory {
}
try (final OPCPackage pkg = OPCPackage.open(testFile, PackageAccess.READ);
- final POITextExtractor ext = ExtractorFactory.createExtractor(pkg)) {
+ final POITextExtractor ext = xmlFactory.create(pkg)) {
testExtractor(ext, (String) TEST_SET[i], (String) TEST_SET[i + 2], (Integer) TEST_SET[i + 3]);
pkg.revert();
}
@@ -222,7 +230,7 @@ public class TestExtractorFactory {
public void testPackageInvalid() throws Exception {
// Text
try (final OPCPackage pkg = OPCPackage.open(txt, PackageAccess.READ);
- final POITextExtractor ignored = ExtractorFactory.createExtractor(pkg)) {
+ final POITextExtractor ignored = xmlFactory.create(pkg)) {
fail("extracting from invalid package");
}
}
@@ -251,61 +259,45 @@ public class TestExtractorFactory {
assertTrue(ExtractorFactory.getThreadPrefersEventExtractors());
assertNull(ExtractorFactory.getAllThreadsPreferEventExtractors());
+ try {
+ // Check we get the right extractors now
+ try (POITextExtractor extractor = ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(xls)))) {
+ assertTrue(extractor instanceof EventBasedExcelExtractor);
+ }
+ try (POITextExtractor extractor = ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(xls)))) {
+ assertTrue(extractor.getText().length() > 200);
+ }
- // Check we get the right extractors now
- POITextExtractor extractor = ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(xls)));
- assertTrue(
- extractor
- instanceof EventBasedExcelExtractor
- );
- extractor.close();
- extractor = ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(xls)));
- assertTrue(
- extractor.getText().length() > 200
- );
- extractor.close();
-
- extractor = ExtractorFactory.createExtractor(OPCPackage.open(xlsx.toString(), PackageAccess.READ));
- assertTrue(extractor instanceof XSSFEventBasedExcelExtractor);
- extractor.close();
-
- extractor = ExtractorFactory.createExtractor(OPCPackage.open(xlsx.toString(), PackageAccess.READ));
- assertTrue(
- extractor.getText().length() > 200
- );
- extractor.close();
+ try (POITextExtractor extractor = xmlFactory.create(OPCPackage.open(xlsx.toString(), PackageAccess.READ))) {
+ assertTrue(extractor instanceof XSSFEventBasedExcelExtractor);
+ }
+ try (POITextExtractor extractor = xmlFactory.create(OPCPackage.open(xlsx.toString(), PackageAccess.READ))) {
+ assertTrue(extractor.getText().length() > 200);
+ }
+ } finally {
+ // Put back to normal
+ ExtractorFactory.setThreadPrefersEventExtractors(false);
+ }
- // Put back to normal
- ExtractorFactory.setThreadPrefersEventExtractors(false);
assertFalse(ExtractorFactory.getPreferEventExtractor());
assertFalse(ExtractorFactory.getThreadPrefersEventExtractors());
assertNull(ExtractorFactory.getAllThreadsPreferEventExtractors());
// And back
- extractor = ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(xls)));
- assertTrue(
- extractor
- instanceof ExcelExtractor
- );
- extractor.close();
- extractor = ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(xls)));
- assertTrue(
- extractor.getText().length() > 200
- );
- extractor.close();
-
- extractor = ExtractorFactory.createExtractor(OPCPackage.open(xlsx.toString(), PackageAccess.READ));
- assertTrue(
- extractor
- instanceof XSSFExcelExtractor
- );
- extractor.close();
- extractor = ExtractorFactory.createExtractor(OPCPackage.open(xlsx.toString()));
- assertTrue(
- extractor.getText().length() > 200
- );
- extractor.close();
+ try (POITextExtractor extractor = ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(xls)))) {
+ assertTrue(extractor instanceof ExcelExtractor);
+ }
+ try (POITextExtractor extractor = ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(xls)))) {
+ assertTrue(extractor.getText().length() > 200);
+ }
+
+ try (POITextExtractor extractor = xmlFactory.create(OPCPackage.open(xlsx.toString(), PackageAccess.READ))) {
+ assertTrue(extractor instanceof XSSFExcelExtractor);
+ }
+ try (POITextExtractor extractor = xmlFactory.create(OPCPackage.open(xlsx.toString()))) {
+ assertTrue(extractor.getText().length() > 200);
+ }
}
/**
@@ -325,7 +317,7 @@ public class TestExtractorFactory {
};
for (int i=0; i<testObj.length; i+=3) {
- try (final POIOLE2TextExtractor ext = ExtractorFactory.createExtractor((File)testObj[i+1])) {
+ try (final POIOLE2TextExtractor ext = (POIOLE2TextExtractor)ExtractorFactory.createExtractor((File)testObj[i+1])) {
final POITextExtractor[] embeds = ExtractorFactory.getEmbeddedDocsTextExtractors(ext);
int numWord = 0, numXls = 0, numPpt = 0, numMsg = 0, numWordX = 0;
@@ -443,13 +435,13 @@ public class TestExtractorFactory {
"spreadsheet/WithChartSheet.xlsx",
"spreadsheet/chart_sheet.xlsx",
};
-
+
@Test
public void testFileLeak() {
- // run a number of files that might fail in order to catch
+ // run a number of files that might fail in order to catch
// leaked file resources when using file-leak-detector while
// running the test
-
+
for(String file : EXPECTED_FAILURES) {
try {
ExtractorFactory.createExtractor(POIDataSamples.getSpreadSheetInstance().getFile(file));
@@ -458,21 +450,22 @@ public class TestExtractorFactory {
}
}
}
-
+
/**
- * #59074 - Excel 95 files should give a helpful message, not just
+ * #59074 - Excel 95 files should give a helpful message, not just
* "No supported documents found in the OLE2 stream"
*/
- @Test(expected = OldExcelFormatException.class)
public void bug59074() throws Exception {
- ExtractorFactory.createExtractor(
- POIDataSamples.getSpreadSheetInstance().getFile("59074.xls"));
+ try (POITextExtractor extractor = ExtractorFactory.createExtractor(POIDataSamples.getSpreadSheetInstance().getFile("59074.xls"))) {
+ String text = extractor.getText();
+ assertContains(text, "testdoc");
+ }
}
@Test(expected = IllegalStateException.class)
- public void testGetEmbeddedFromXMLExtractor() {
+ public void testGetEmbeddedFromXMLExtractor() throws IOException {
// currently not implemented
- ExtractorFactory.getEmbeddedDocsTextExtractors((POIXMLTextExtractor)null);
+ ExtractorFactory.getEmbeddedDocsTextExtractors(null);
}
// This bug is currently open. This test will fail with "expected error not thrown" when the bug has been fixed.
Modified: poi/trunk/src/ooxml/testcases/org/apache/poi/openxml4j/opc/TestPackage.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/testcases/org/apache/poi/openxml4j/opc/TestPackage.java?rev=1880839&r1=1880838&r2=1880839&view=diff
==============================================================================
--- poi/trunk/src/ooxml/testcases/org/apache/poi/openxml4j/opc/TestPackage.java (original)
+++ poi/trunk/src/ooxml/testcases/org/apache/poi/openxml4j/opc/TestPackage.java Thu Aug 13 21:08:24 2020
@@ -60,9 +60,9 @@ import org.apache.poi.EncryptedDocumentE
import org.apache.poi.POIDataSamples;
import org.apache.poi.POITestCase;
import org.apache.poi.UnsupportedFileFormatException;
+import org.apache.poi.extractor.ExtractorFactory;
import org.apache.poi.extractor.POITextExtractor;
import org.apache.poi.ooxml.POIXMLException;
-import org.apache.poi.ooxml.extractor.ExtractorFactory;
import org.apache.poi.ooxml.util.DocumentHelper;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.exceptions.InvalidOperationException;
@@ -836,7 +836,7 @@ public final class TestPackage {
@Test
public void testZipEntityExpansionExceedsMemory() throws IOException, OpenXML4JException, XmlException {
- expectedEx.expect(POIXMLException.class);
+ expectedEx.expect(IOException.class);
expectedEx.expectMessage("unable to parse shared strings table");
expectedEx.expectCause(getCauseMatcher(SAXParseException.class, "The parser has encountered more than"));
openXmlBombFile("poc-xmlbomb.xlsx");
@@ -844,7 +844,7 @@ public final class TestPackage {
@Test
public void testZipEntityExpansionExceedsMemory2() throws IOException, OpenXML4JException, XmlException {
- expectedEx.expect(POIXMLException.class);
+ expectedEx.expect(IOException.class);
expectedEx.expectMessage("unable to parse shared strings table");
expectedEx.expectCause(getCauseMatcher(SAXParseException.class, "The parser has encountered more than"));
openXmlBombFile("poc-xmlbomb-empty.xlsx");
Modified: poi/trunk/src/ooxml/testcases/org/apache/poi/poifs/crypt/tests/TestHxxFEncryption.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/testcases/org/apache/poi/poifs/crypt/tests/TestHxxFEncryption.java?rev=1880839&r1=1880838&r2=1880839&view=diff
==============================================================================
--- poi/trunk/src/ooxml/testcases/org/apache/poi/poifs/crypt/tests/TestHxxFEncryption.java (original)
+++ poi/trunk/src/ooxml/testcases/org/apache/poi/poifs/crypt/tests/TestHxxFEncryption.java Thu Aug 13 21:08:24 2020
@@ -35,14 +35,12 @@ import java.util.Collection;
import org.apache.poi.POIDataSamples;
import org.apache.poi.POIDocument;
+import org.apache.poi.extractor.ExtractorFactory;
import org.apache.poi.extractor.POITextExtractor;
import org.apache.poi.hssf.record.crypto.Biff8EncryptionKey;
-import org.apache.poi.ooxml.extractor.ExtractorFactory;
-import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.poifs.crypt.EncryptionInfo;
import org.apache.poi.poifs.crypt.cryptoapi.CryptoAPIEncryptionHeader;
import org.apache.poi.poifs.storage.RawDataUtil;
-import org.apache.xmlbeans.XmlException;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
@@ -91,7 +89,7 @@ public class TestHxxFEncryption {
}
@Test
- public void extract() throws IOException, OpenXML4JException, XmlException {
+ public void extract() throws IOException {
File f = sampleDir.getFile(file);
Biff8EncryptionKey.setCurrentUserPassword(password);
try (POITextExtractor te = ExtractorFactory.createExtractor(f)) {
@@ -103,16 +101,16 @@ public class TestHxxFEncryption {
}
@Test
- public void changePassword() throws IOException, OpenXML4JException, XmlException {
+ public void changePassword() throws IOException {
newPassword("test");
}
@Test
- public void removePassword() throws IOException, OpenXML4JException, XmlException {
+ public void removePassword() throws IOException {
newPassword(null);
}
- private void newPassword(String newPass) throws IOException, OpenXML4JException, XmlException {
+ private void newPassword(String newPass) throws IOException {
File f = sampleDir.getFile(file);
Biff8EncryptionKey.setCurrentUserPassword(password);
try (POITextExtractor te1 = ExtractorFactory.createExtractor(f)) {
@@ -133,7 +131,7 @@ public class TestHxxFEncryption {
/** changing the encryption mode and key size in poor mans style - see comments below */
@Test
- public void changeEncryption() throws IOException, OpenXML4JException, XmlException {
+ public void changeEncryption() throws IOException {
File f = sampleDir.getFile(file);
ByteArrayOutputStream bos = new ByteArrayOutputStream();
Biff8EncryptionKey.setCurrentUserPassword(password);
@@ -157,7 +155,7 @@ public class TestHxxFEncryption {
POIDocument doc = (POIDocument) te3.getDocument()) {
// need to cache data (i.e. read all data) before changing the key size
Class<?> clazz = doc.getClass();
- if ("HSLFSlideShowImpl".equals(clazz.getSimpleName())) {
+ if ("HSLFSlideShow".equals(clazz.getSimpleName())) {
try {
clazz.getDeclaredMethod("getPictureData").invoke(doc);
} catch (ReflectiveOperationException e) {
Modified: poi/trunk/src/ooxml/testcases/org/apache/poi/xslf/TestXSLFBugs.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/testcases/org/apache/poi/xslf/TestXSLFBugs.java?rev=1880839&r1=1880838&r2=1880839&view=diff
==============================================================================
--- poi/trunk/src/ooxml/testcases/org/apache/poi/xslf/TestXSLFBugs.java (original)
+++ poi/trunk/src/ooxml/testcases/org/apache/poi/xslf/TestXSLFBugs.java Thu Aug 13 21:08:24 2020
@@ -522,7 +522,7 @@ public class TestXSLFBugs {
private String getSlideText(XMLSlideShow ppt, XSLFSlide slide) throws IOException {
try (SlideShowExtractor<XSLFShape,XSLFTextParagraph> extr = new SlideShowExtractor<>(ppt)) {
// do not auto-close the slideshow
- extr.setFilesystem(null);
+ extr.setCloseFilesystem(false);
extr.setSlidesByDefault(true);
extr.setNotesByDefault(false);
extr.setMasterByDefault(false);
Modified: poi/trunk/src/ooxml/testcases/org/apache/poi/xslf/extractor/TestXSLFPowerPointExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/testcases/org/apache/poi/xslf/extractor/TestXSLFPowerPointExtractor.java?rev=1880839&r1=1880838&r2=1880839&view=diff
==============================================================================
--- poi/trunk/src/ooxml/testcases/org/apache/poi/xslf/extractor/TestXSLFPowerPointExtractor.java (original)
+++ poi/trunk/src/ooxml/testcases/org/apache/poi/xslf/extractor/TestXSLFPowerPointExtractor.java Thu Aug 13 21:08:24 2020
@@ -29,20 +29,18 @@ import java.io.IOException;
import java.io.InputStream;
import org.apache.poi.POIDataSamples;
-import org.apache.poi.ooxml.extractor.ExtractorFactory;
-import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
+import org.apache.poi.extractor.ExtractorFactory;
import org.apache.poi.sl.extractor.SlideShowExtractor;
import org.apache.poi.xslf.usermodel.XMLSlideShow;
import org.apache.poi.xslf.usermodel.XSLFShape;
import org.apache.poi.xslf.usermodel.XSLFTextParagraph;
-import org.apache.xmlbeans.XmlException;
import org.junit.Test;
/**
* Tests for XSLFPowerPointExtractor
*/
public class TestXSLFPowerPointExtractor {
- private static POIDataSamples slTests = POIDataSamples.getSlideShowInstance();
+ private static final POIDataSamples slTests = POIDataSamples.getSlideShowInstance();
/**
* Get text out of the simple file
@@ -262,10 +260,11 @@ public class TestXSLFPowerPointExtractor
}
@Test
- public void test45541() throws IOException, OpenXML4JException, XmlException {
+ public void test45541() throws IOException {
// extract text from a powerpoint that has a header in the notes-element
final File headerFile = slTests.getFile("45541_Header.pptx");
- try (final SlideShowExtractor extr = ExtractorFactory.createExtractor(headerFile)) {
+ //noinspection rawtypes
+ try (final SlideShowExtractor extr = (SlideShowExtractor) ExtractorFactory.createExtractor(headerFile)) {
String text = extr.getText();
assertNotNull(text);
assertFalse("Had: " + text, text.contains("testdoc"));
@@ -280,7 +279,8 @@ public class TestXSLFPowerPointExtractor
// extract text from a powerpoint that has a footer in the master-slide
final File footerFile = slTests.getFile("45541_Footer.pptx");
- try (SlideShowExtractor extr = ExtractorFactory.createExtractor(footerFile)) {
+ //noinspection rawtypes
+ try (SlideShowExtractor extr = (SlideShowExtractor)ExtractorFactory.createExtractor(footerFile)) {
String text = extr.getText();
assertNotContained(text, "testdoc");
Modified: poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFEventBasedExcelExtractorUsingFactory.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFEventBasedExcelExtractorUsingFactory.java?rev=1880839&r1=1880838&r2=1880839&view=diff
==============================================================================
--- poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFEventBasedExcelExtractorUsingFactory.java (original)
+++ poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFEventBasedExcelExtractorUsingFactory.java Thu Aug 13 21:08:24 2020
@@ -16,7 +16,7 @@
==================================================================== */
package org.apache.poi.xssf.extractor;
-import org.apache.poi.ooxml.extractor.ExtractorFactory;
+import org.apache.poi.extractor.ExtractorFactory;
import org.apache.poi.hssf.HSSFTestDataSamples;
import org.junit.After;
@@ -27,7 +27,7 @@ public class TestXSSFEventBasedExcelExtr
ExtractorFactory.setAllThreadsPreferEventExtractors(true);
return (XSSFEventBasedExcelExtractor) ExtractorFactory.createExtractor(HSSFTestDataSamples.openSampleFileStream(sampleName));
}
-
+
@After
public void tearDown() {
// reset setting to not affect other tests
Modified: poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFExcelExtractorUsingFactory.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFExcelExtractorUsingFactory.java?rev=1880839&r1=1880838&r2=1880839&view=diff
==============================================================================
--- poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFExcelExtractorUsingFactory.java (original)
+++ poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFExcelExtractorUsingFactory.java Thu Aug 13 21:08:24 2020
@@ -17,8 +17,8 @@
package org.apache.poi.xssf.extractor;
+import org.apache.poi.extractor.ExtractorFactory;
import org.apache.poi.hssf.HSSFTestDataSamples;
-import org.apache.poi.ooxml.extractor.ExtractorFactory;
import org.junit.After;
/**
Added: poi/trunk/src/resources/main/META-INF/services/org.apache.poi.extractor.ExtractorProvider
URL: http://svn.apache.org/viewvc/poi/trunk/src/resources/main/META-INF/services/org.apache.poi.extractor.ExtractorProvider?rev=1880839&view=auto
==============================================================================
--- poi/trunk/src/resources/main/META-INF/services/org.apache.poi.extractor.ExtractorProvider (added)
+++ poi/trunk/src/resources/main/META-INF/services/org.apache.poi.extractor.ExtractorProvider Thu Aug 13 21:08:24 2020
@@ -0,0 +1,18 @@
+# ====================================================================
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ====================================================================
+
+org.apache.poi.extractor.MainExtractorFactory
\ No newline at end of file
Copied: poi/trunk/src/resources/ooxml/META-INF/services/org.apache.poi.extractor.ExtractorProvider (from r1880689, poi/trunk/src/resources/main/META-INF/services/org.apache.poi.ss.usermodel.WorkbookProvider)
URL: http://svn.apache.org/viewvc/poi/trunk/src/resources/ooxml/META-INF/services/org.apache.poi.extractor.ExtractorProvider?p2=poi/trunk/src/resources/ooxml/META-INF/services/org.apache.poi.extractor.ExtractorProvider&p1=poi/trunk/src/resources/main/META-INF/services/org.apache.poi.ss.usermodel.WorkbookProvider&r1=1880689&r2=1880839&rev=1880839&view=diff
==============================================================================
--- poi/trunk/src/resources/main/META-INF/services/org.apache.poi.ss.usermodel.WorkbookProvider (original)
+++ poi/trunk/src/resources/ooxml/META-INF/services/org.apache.poi.extractor.ExtractorProvider Thu Aug 13 21:08:24 2020
@@ -15,4 +15,4 @@
# limitations under the License.
# ====================================================================
-org.apache.poi.hssf.usermodel.HSSFWorkbookFactory
\ No newline at end of file
+org.apache.poi.ooxml.extractor.POIXMLExtractorFactory
\ No newline at end of file
Added: poi/trunk/src/resources/scratchpad/META-INF/services/org.apache.poi.extractor.ExtractorProvider
URL: http://svn.apache.org/viewvc/poi/trunk/src/resources/scratchpad/META-INF/services/org.apache.poi.extractor.ExtractorProvider?rev=1880839&view=auto
==============================================================================
--- poi/trunk/src/resources/scratchpad/META-INF/services/org.apache.poi.extractor.ExtractorProvider (added)
+++ poi/trunk/src/resources/scratchpad/META-INF/services/org.apache.poi.extractor.ExtractorProvider Thu Aug 13 21:08:24 2020
@@ -0,0 +1,18 @@
+# ====================================================================
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ====================================================================
+
+org.apache.poi.extractor.ole2.OLE2ScratchpadExtractorFactory
\ No newline at end of file
Modified: poi/trunk/src/scratchpad/src/org/apache/poi/extractor/ole2/OLE2ScratchpadExtractorFactory.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/extractor/ole2/OLE2ScratchpadExtractorFactory.java?rev=1880839&r1=1880838&r2=1880839&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/extractor/ole2/OLE2ScratchpadExtractorFactory.java (original)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/extractor/ole2/OLE2ScratchpadExtractorFactory.java Thu Aug 13 21:08:24 2020
@@ -17,44 +17,66 @@
package org.apache.poi.extractor.ole2;
import java.io.ByteArrayInputStream;
+import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
-import java.util.Iterator;
import java.util.List;
+import java.util.stream.StreamSupport;
+import org.apache.poi.extractor.ExtractorFactory;
+import org.apache.poi.extractor.ExtractorProvider;
import org.apache.poi.extractor.POIOLE2TextExtractor;
import org.apache.poi.extractor.POITextExtractor;
-import org.apache.poi.extractor.OLE2ExtractorFactory;
import org.apache.poi.hdgf.extractor.VisioTextExtractor;
import org.apache.poi.hpbf.extractor.PublisherTextExtractor;
+import org.apache.poi.hslf.usermodel.HSLFShape;
import org.apache.poi.hslf.usermodel.HSLFSlideShow;
+import org.apache.poi.hslf.usermodel.HSLFTextParagraph;
import org.apache.poi.hsmf.MAPIMessage;
import org.apache.poi.hsmf.datatypes.AttachmentChunks;
import org.apache.poi.hsmf.extractor.OutlookTextExtractor;
import org.apache.poi.hssf.extractor.ExcelExtractor;
+import org.apache.poi.hssf.record.crypto.Biff8EncryptionKey;
import org.apache.poi.hwpf.OldWordFileFormatException;
import org.apache.poi.hwpf.extractor.Word6Extractor;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.poifs.filesystem.DirectoryEntry;
import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.Entry;
+import org.apache.poi.poifs.filesystem.FileMagic;
+import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.sl.extractor.SlideShowExtractor;
import org.apache.poi.sl.usermodel.SlideShowFactory;
import org.apache.poi.util.POILogFactory;
import org.apache.poi.util.POILogger;
/**
- * Scratchpad-specific logic for {@link OLE2ExtractorFactory} and
+ * Scratchpad-specific logic for {@link ExtractorFactory} and
* {@link org.apache.poi.extractor.ExtractorFactory}, which permit the other two to run with
* no Scratchpad jar (though without functionality!)
* <p>Note - should not be used standalone, always use via the other
* two classes</p>
*/
@SuppressWarnings("WeakerAccess")
-public class OLE2ScratchpadExtractorFactory {
+public class OLE2ScratchpadExtractorFactory implements ExtractorProvider {
private static final POILogger logger = POILogFactory.getLogger(OLE2ScratchpadExtractorFactory.class);
+ @Override
+ public boolean accepts(FileMagic fm) {
+ return FileMagic.OLE2 == fm;
+ }
+
+ @Override
+ public POITextExtractor create(File file, String password) throws IOException {
+ return create(new POIFSFileSystem(file, true).getRoot(), password);
+ }
+
+ @Override
+ public POITextExtractor create(InputStream inputStream, String password) throws IOException {
+ return create(new POIFSFileSystem(inputStream).getRoot(), password);
+ }
+
/**
* Look for certain entries in the stream, to figure it
* out what format is desired
@@ -66,48 +88,54 @@ public class OLE2ScratchpadExtractorFact
*
* @throws IOException when the format specific extraction fails because of invalid entires
*/
- public static POITextExtractor createExtractor(DirectoryNode poifsDir) throws IOException {
- if (poifsDir.hasEntry("WordDocument")) {
- // Old or new style word document?
- try {
- return new WordExtractor(poifsDir);
- } catch (OldWordFileFormatException e) {
- return new Word6Extractor(poifsDir);
+ public POITextExtractor create(DirectoryNode poifsDir, String password) throws IOException {
+ final String oldPW = Biff8EncryptionKey.getCurrentUserPassword();
+ try {
+ Biff8EncryptionKey.setCurrentUserPassword(password);
+ if (poifsDir.hasEntry("WordDocument")) {
+ // Old or new style word document?
+ try {
+ return new WordExtractor(poifsDir);
+ } catch (OldWordFileFormatException e) {
+ return new Word6Extractor(poifsDir);
+ }
}
- }
- if (poifsDir.hasEntry(HSLFSlideShow.POWERPOINT_DOCUMENT)) {
- return new SlideShowExtractor(SlideShowFactory.create(poifsDir));
- }
+ if (poifsDir.hasEntry(HSLFSlideShow.POWERPOINT_DOCUMENT)) {
+ return new SlideShowExtractor<HSLFShape, HSLFTextParagraph>(SlideShowFactory.create(poifsDir));
+ }
- if (poifsDir.hasEntry("VisioDocument")) {
- return new VisioTextExtractor(poifsDir);
- }
+ if (poifsDir.hasEntry("VisioDocument")) {
+ return new VisioTextExtractor(poifsDir);
+ }
- if (poifsDir.hasEntry("Quill")) {
- return new PublisherTextExtractor(poifsDir);
- }
+ if (poifsDir.hasEntry("Quill")) {
+ return new PublisherTextExtractor(poifsDir);
+ }
- final String[] outlookEntryNames = new String[] {
- // message bodies, saved as plain text (PtypString)
- // The first short (0x1000, 0x0047, 0x0037) refer to the Property ID (see [MS-OXPROPS].pdf)
- // the second short (0x001e, 0x001f, 0x0102) refer to the type of data stored in this entry
- // https://msdn.microsoft.com/endatatypes.Ex-us/library/cc433490(v=exchg.80).aspx
- // @see org.apache.poi.hsmf.Types.MAPIType
- "__substg1.0_1000001E", //PidTagBody ASCII
- "__substg1.0_1000001F", //PidTagBody Unicode
- "__substg1.0_0047001E", //PidTagMessageSubmissionId ASCII
- "__substg1.0_0047001F", //PidTagMessageSubmissionId Unicode
- "__substg1.0_0037001E", //PidTagSubject ASCII
- "__substg1.0_0037001F", //PidTagSubject Unicode
- };
- for (String entryName : outlookEntryNames) {
- if (poifsDir.hasEntry(entryName)) {
- return new OutlookTextExtractor(poifsDir);
+ final String[] outlookEntryNames = new String[]{
+ // message bodies, saved as plain text (PtypString)
+ // The first short (0x1000, 0x0047, 0x0037) refer to the Property ID (see [MS-OXPROPS].pdf)
+ // the second short (0x001e, 0x001f, 0x0102) refer to the type of data stored in this entry
+ // https://msdn.microsoft.com/endatatypes.Ex-us/library/cc433490(v=exchg.80).aspx
+ // @see org.apache.poi.hsmf.Types.MAPIType
+ "__substg1.0_1000001E", //PidTagBody ASCII
+ "__substg1.0_1000001F", //PidTagBody Unicode
+ "__substg1.0_0047001E", //PidTagMessageSubmissionId ASCII
+ "__substg1.0_0047001F", //PidTagMessageSubmissionId Unicode
+ "__substg1.0_0037001E", //PidTagSubject ASCII
+ "__substg1.0_0037001F", //PidTagSubject Unicode
+ };
+ for (String entryName : outlookEntryNames) {
+ if (poifsDir.hasEntry(entryName)) {
+ return new OutlookTextExtractor(poifsDir);
+ }
}
+ } finally {
+ Biff8EncryptionKey.setCurrentUserPassword(oldPW);
}
- throw new IllegalArgumentException("No supported documents found in the OLE2 stream");
+ return null;
}
/**
@@ -120,10 +148,9 @@ public class OLE2ScratchpadExtractorFact
* @param ext the extractor holding the directory to start parsing
* @param dirs a list to be filled with directory references holding embedded
* @param nonPOIFS a list to be filled with streams which aren't based on POIFS entries
- *
- * @throws IOException when the format specific extraction fails because of invalid entires
*/
- public static void identifyEmbeddedResources(POIOLE2TextExtractor ext, List<Entry> dirs, List<InputStream> nonPOIFS) throws IOException {
+ @Override
+ public void identifyEmbeddedResources(POIOLE2TextExtractor ext, List<Entry> dirs, List<InputStream> nonPOIFS) {
// Find all the embedded directories
DirectoryEntry root = ext.getRoot();
if (root == null) {
@@ -132,25 +159,16 @@ public class OLE2ScratchpadExtractorFact
if (ext instanceof ExcelExtractor) {
// These are in MBD... under the root
- Iterator<Entry> it = root.getEntries();
- while (it.hasNext()) {
- Entry entry = it.next();
- if (entry.getName().startsWith("MBD")) {
- dirs.add(entry);
- }
- }
+ StreamSupport.stream(root.spliterator(), false)
+ .filter(entry -> entry.getName().startsWith("MBD"))
+ .forEach(dirs::add);
} else if (ext instanceof WordExtractor) {
// These are in ObjectPool -> _... under the root
try {
- DirectoryEntry op = (DirectoryEntry)
- root.getEntry("ObjectPool");
- Iterator<Entry> it = op.getEntries();
- while(it.hasNext()) {
- Entry entry = it.next();
- if(entry.getName().startsWith("_")) {
- dirs.add(entry);
- }
- }
+ DirectoryEntry op = (DirectoryEntry) root.getEntry("ObjectPool");
+ StreamSupport.stream(op.spliterator(), false)
+ .filter(entry -> entry.getName().startsWith("_"))
+ .forEach(dirs::add);
} catch(FileNotFoundException e) {
logger.log(POILogger.INFO, "Ignoring FileNotFoundException while extracting Word document", e.getLocalizedMessage());
// ignored here
Modified: poi/trunk/src/scratchpad/src/org/apache/poi/hdgf/extractor/VisioTextExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hdgf/extractor/VisioTextExtractor.java?rev=1880839&r1=1880838&r2=1880839&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hdgf/extractor/VisioTextExtractor.java (original)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hdgf/extractor/VisioTextExtractor.java Thu Aug 13 21:08:24 2020
@@ -17,7 +17,6 @@
package org.apache.poi.hdgf.extractor;
-import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
@@ -38,11 +37,11 @@ import org.apache.poi.poifs.filesystem.P
* Can operate on the command line (outputs to stdout), or
* can return the text for you (example: for use with Lucene).
*/
-public final class VisioTextExtractor extends POIOLE2TextExtractor {
+public final class VisioTextExtractor implements POIOLE2TextExtractor {
private HDGFDiagram hdgf;
+ private boolean doCloseFilesystem = true;
public VisioTextExtractor(HDGFDiagram hdgf) {
- super(hdgf);
this.hdgf = hdgf;
}
public VisioTextExtractor(POIFSFileSystem fs) throws IOException {
@@ -91,9 +90,7 @@ public final class VisioTextExtractor ex
// Capture the text, as long as it isn't
// simply an empty string
String str = cmd.getValue().toString();
- if(str.isEmpty() || "\n".equals(str)) {
- // Ignore empty strings
- } else {
+ if (!(str.isEmpty() || "\n".equals(str))) {
text.add( str );
}
}
@@ -121,21 +118,23 @@ public final class VisioTextExtractor ex
return text.toString();
}
- public static void main(String[] args) throws Exception {
- if(args.length == 0) {
- System.err.println("Use:");
- System.err.println(" VisioTextExtractor <file.vsd>");
- System.exit(1);
- }
+ @Override
+ public HDGFDiagram getDocument() {
+ return hdgf;
+ }
- try (FileInputStream fis = new FileInputStream(args[0])) {
- VisioTextExtractor extractor =
- new VisioTextExtractor(fis);
+ @Override
+ public void setCloseFilesystem(boolean doCloseFilesystem) {
+ this.doCloseFilesystem = doCloseFilesystem;
+ }
- // Print not PrintLn as already has \n added to it
- System.out.print(extractor.getText());
+ @Override
+ public boolean isCloseFilesystem() {
+ return doCloseFilesystem;
+ }
- extractor.close();
- }
+ @Override
+ public HDGFDiagram getFilesystem() {
+ return hdgf;
}
}
Modified: poi/trunk/src/scratchpad/src/org/apache/poi/hpbf/extractor/PublisherTextExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hpbf/extractor/PublisherTextExtractor.java?rev=1880839&r1=1880838&r2=1880839&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hpbf/extractor/PublisherTextExtractor.java (original)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hpbf/extractor/PublisherTextExtractor.java Thu Aug 13 21:08:24 2020
@@ -17,35 +17,37 @@
package org.apache.poi.hpbf.extractor;
-import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import org.apache.poi.extractor.POIOLE2TextExtractor;
import org.apache.poi.hpbf.HPBFDocument;
import org.apache.poi.hpbf.model.qcbits.QCBit;
-import org.apache.poi.hpbf.model.qcbits.QCTextBit;
import org.apache.poi.hpbf.model.qcbits.QCPLCBit.Type12;
+import org.apache.poi.hpbf.model.qcbits.QCTextBit;
import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
/**
* Extract text from HPBF Publisher files
*/
-public final class PublisherTextExtractor extends POIOLE2TextExtractor {
- private HPBFDocument doc;
+public final class PublisherTextExtractor implements POIOLE2TextExtractor {
+ private final HPBFDocument doc;
private boolean hyperlinksByDefault;
+ private boolean doCloseFilesystem = true;
public PublisherTextExtractor(HPBFDocument doc) {
- super(doc);
this.doc = doc;
}
+
public PublisherTextExtractor(DirectoryNode dir) throws IOException {
this(new HPBFDocument(dir));
}
+
public PublisherTextExtractor(POIFSFileSystem fs) throws IOException {
this(new HPBFDocument(fs));
}
+
public PublisherTextExtractor(InputStream is) throws IOException {
this(new POIFSFileSystem(is));
}
@@ -66,7 +68,7 @@ public final class PublisherTextExtracto
// Get the text from the Quill Contents
QCBit[] bits = doc.getQuillContents().getBits();
for (QCBit bit1 : bits) {
- if (bit1 != null && bit1 instanceof QCTextBit) {
+ if (bit1 instanceof QCTextBit) {
QCTextBit t = (QCTextBit) bit1;
text.append(t.getText().replace('\r', '\n'));
}
@@ -79,7 +81,7 @@ public final class PublisherTextExtracto
// how to tie that together.
if(hyperlinksByDefault) {
for (QCBit bit : bits) {
- if (bit != null && bit instanceof Type12) {
+ if (bit instanceof Type12) {
Type12 hyperlinks = (Type12) bit;
for (int j = 0; j < hyperlinks.getNumberOfHyperlinks(); j++) {
text.append("<");
@@ -96,19 +98,23 @@ public final class PublisherTextExtracto
return text.toString();
}
+ @Override
+ public HPBFDocument getDocument() {
+ return doc;
+ }
+
+ @Override
+ public void setCloseFilesystem(boolean doCloseFilesystem) {
+ this.doCloseFilesystem = doCloseFilesystem;
+ }
- public static void main(String[] args) throws Exception {
- if(args.length == 0) {
- System.err.println("Use:");
- System.err.println(" PublisherTextExtractor <file.pub>");
- }
+ @Override
+ public boolean isCloseFilesystem() {
+ return doCloseFilesystem;
+ }
- for (String arg : args) {
- try (FileInputStream fis = new FileInputStream(arg)) {
- PublisherTextExtractor te = new PublisherTextExtractor(fis);
- System.out.println(te.getText());
- te.close();
- }
- }
+ @Override
+ public HPBFDocument getFilesystem() {
+ return doc;
}
}
Modified: poi/trunk/src/scratchpad/src/org/apache/poi/hslf/usermodel/HSLFSlideShow.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hslf/usermodel/HSLFSlideShow.java?rev=1880839&r1=1880838&r2=1880839&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hslf/usermodel/HSLFSlideShow.java (original)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hslf/usermodel/HSLFSlideShow.java Thu Aug 13 21:08:24 2020
@@ -33,6 +33,7 @@ import java.util.List;
import java.util.Map;
import java.util.function.Supplier;
+import org.apache.poi.POIDocument;
import org.apache.poi.common.usermodel.GenericRecord;
import org.apache.poi.common.usermodel.fonts.FontInfo;
import org.apache.poi.ddf.EscherBSERecord;
@@ -40,6 +41,9 @@ import org.apache.poi.ddf.EscherContaine
import org.apache.poi.ddf.EscherOptRecord;
import org.apache.poi.hpsf.ClassID;
import org.apache.poi.hpsf.ClassIDPredefined;
+import org.apache.poi.hpsf.DocumentSummaryInformation;
+import org.apache.poi.hpsf.PropertySet;
+import org.apache.poi.hpsf.SummaryInformation;
import org.apache.poi.hpsf.extractor.HPSFPropertiesExtractor;
import org.apache.poi.hslf.exceptions.CorruptPowerPointFileException;
import org.apache.poi.hslf.exceptions.HSLFException;
@@ -47,6 +51,7 @@ import org.apache.poi.hslf.model.Headers
import org.apache.poi.hslf.model.MovieShape;
import org.apache.poi.hslf.record.*;
import org.apache.poi.hslf.record.SlideListWithText.SlideAtomsSet;
+import org.apache.poi.poifs.crypt.EncryptionInfo;
import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.Ole10Native;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
@@ -66,7 +71,7 @@ import org.apache.poi.util.Units;
* TODO: - figure out how to match notes to their correct sheet (will involve
* understanding DocSlideList and DocNotesList) - handle Slide creation cleaner
*/
-public final class HSLFSlideShow implements SlideShow<HSLFShape,HSLFTextParagraph>, Closeable, GenericRecord {
+public final class HSLFSlideShow extends POIDocument implements SlideShow<HSLFShape,HSLFTextParagraph>, Closeable, GenericRecord {
//arbitrarily selected; may need to increase
private static final int MAX_RECORD_LENGTH = 10_000_000;
@@ -111,6 +116,8 @@ public final class HSLFSlideShow impleme
* @param hslfSlideShow the HSLFSlideShow to base on
*/
public HSLFSlideShow(HSLFSlideShowImpl hslfSlideShow) {
+ super(hslfSlideShow.getDirectory());
+
loadSavePhase.set(LoadSavePhase.INIT);
// Get useful things from our base slideshow
@@ -1080,7 +1087,7 @@ public final class HSLFSlideShow impleme
public HPSFPropertiesExtractor getMetadataTextExtractor() {
return new HPSFPropertiesExtractor(getSlideShowImpl());
}
-
+
int addToObjListAtom(RecordContainer exObj) {
ExObjList lst = getDocumentRecord().getExObjList(true);
ExObjListAtom objAtom = lst.getExObjListAtom();
@@ -1097,7 +1104,7 @@ public final class HSLFSlideShow impleme
Map<String,ClassID> olemap = new HashMap<>();
olemap.put(POWERPOINT_DOCUMENT, ClassIDPredefined.POWERPOINT_V8.getClassID());
// as per BIFF8 spec
- olemap.put("Workbook", ClassIDPredefined.EXCEL_V8.getClassID());
+ olemap.put("Workbook", ClassIDPredefined.EXCEL_V8.getClassID());
// Typically from third party programs
olemap.put("WORKBOOK", ClassIDPredefined.EXCEL_V8.getClassID());
// Typically odd Crystal Reports exports
@@ -1179,4 +1186,94 @@ public final class HSLFSlideShow impleme
public List<? extends GenericRecord> getGenericChildren() {
return Arrays.asList(_hslfSlideShow.getRecords());
}
+
+ @Override
+ public void write() throws IOException {
+ getSlideShowImpl().write();
+ }
+
+ @Override
+ public void write(File newFile) throws IOException {
+ getSlideShowImpl().write(newFile);
+ }
+
+ @Override
+ public DocumentSummaryInformation getDocumentSummaryInformation() {
+ return getSlideShowImpl().getDocumentSummaryInformation();
+ }
+
+ @Override
+ public SummaryInformation getSummaryInformation() {
+ return getSlideShowImpl().getSummaryInformation();
+ }
+
+ @Override
+ public void createInformationProperties() {
+ getSlideShowImpl().createInformationProperties();
+ }
+
+ @Override
+ public void readProperties() {
+ getSlideShowImpl().readProperties();
+ }
+
+ @Override
+ protected PropertySet getPropertySet(String setName) throws IOException {
+ return getSlideShowImpl().getPropertySetImpl(setName);
+ }
+
+ @Override
+ protected PropertySet getPropertySet(String setName, EncryptionInfo encryptionInfo) throws IOException {
+ return getSlideShowImpl().getPropertySetImpl(setName, encryptionInfo);
+ }
+
+ @Override
+ protected void writeProperties() throws IOException {
+ getSlideShowImpl().writePropertiesImpl();
+ }
+
+ @Override
+ public void writeProperties(POIFSFileSystem outFS) throws IOException {
+ getSlideShowImpl().writeProperties(outFS);
+ }
+
+ @Override
+ protected void writeProperties(POIFSFileSystem outFS, List<String> writtenEntries) throws IOException {
+ getSlideShowImpl().writePropertiesImpl(outFS, writtenEntries);
+ }
+
+ @Override
+ protected void validateInPlaceWritePossible() throws IllegalStateException {
+ getSlideShowImpl().validateInPlaceWritePossibleImpl();
+ }
+
+ @Override
+ public DirectoryNode getDirectory() {
+ return getSlideShowImpl().getDirectory();
+ }
+
+ @Override
+ protected void clearDirectory() {
+ getSlideShowImpl().clearDirectoryImpl();
+ }
+
+ @Override
+ protected boolean initDirectory() {
+ return getSlideShowImpl().initDirectoryImpl();
+ }
+
+ @Override
+ protected void replaceDirectory(DirectoryNode newDirectory) {
+ getSlideShowImpl().replaceDirectoryImpl(newDirectory);
+ }
+
+ @Override
+ protected String getEncryptedPropertyStreamName() {
+ return getSlideShowImpl().getEncryptedPropertyStreamName();
+ }
+
+ @Override
+ public EncryptionInfo getEncryptionInfo() throws IOException {
+ return getSlideShowImpl().getEncryptionInfo();
+ }
}
Modified: poi/trunk/src/scratchpad/src/org/apache/poi/hslf/usermodel/HSLFSlideShowImpl.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hslf/usermodel/HSLFSlideShowImpl.java?rev=1880839&r1=1880838&r2=1880839&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hslf/usermodel/HSLFSlideShowImpl.java (original)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hslf/usermodel/HSLFSlideShowImpl.java Thu Aug 13 21:08:24 2020
@@ -36,6 +36,7 @@ import java.util.NavigableMap;
import java.util.TreeMap;
import org.apache.poi.POIDocument;
+import org.apache.poi.hpsf.PropertySet;
import org.apache.poi.hslf.exceptions.CorruptPowerPointFileException;
import org.apache.poi.hslf.exceptions.HSLFException;
import org.apache.poi.hslf.exceptions.OldPowerPointFormatException;
@@ -714,8 +715,6 @@ public final class HSLFSlideShowImpl ext
}
-
-
/* ******************* adding methods follow ********************* */
/**
@@ -850,6 +849,38 @@ public final class HSLFSlideShowImpl ext
return "EncryptedSummary";
}
+ void writePropertiesImpl() throws IOException {
+ super.writeProperties();
+ }
+
+ PropertySet getPropertySetImpl(String setName) throws IOException {
+ return super.getPropertySet(setName);
+ }
+
+ PropertySet getPropertySetImpl(String setName, EncryptionInfo encryptionInfo) throws IOException {
+ return super.getPropertySet(setName, encryptionInfo);
+ }
+
+ void writePropertiesImpl(POIFSFileSystem outFS, List<String> writtenEntries) throws IOException {
+ super.writeProperties(outFS, writtenEntries);
+ }
+
+ void validateInPlaceWritePossibleImpl() throws IllegalStateException {
+ super.validateInPlaceWritePossible();
+ }
+
+ void clearDirectoryImpl() {
+ super.clearDirectory();
+ }
+
+ boolean initDirectoryImpl() {
+ return super.initDirectory();
+ }
+
+ void replaceDirectoryImpl(DirectoryNode newDirectory) {
+ super.replaceDirectory(newDirectory);
+ }
+
private static class BufAccessBAOS extends ByteArrayOutputStream {
public byte[] getBuf() {
return buf;
Modified: poi/trunk/src/scratchpad/src/org/apache/poi/hsmf/extractor/OutlookTextExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hsmf/extractor/OutlookTextExtractor.java?rev=1880839&r1=1880838&r2=1880839&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hsmf/extractor/OutlookTextExtractor.java (original)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hsmf/extractor/OutlookTextExtractor.java Thu Aug 13 21:08:24 2020
@@ -42,9 +42,12 @@ import org.apache.poi.util.LocaleUtil;
*
* @since 4.1.2
*/
-public class OutlookTextExtractor extends POIOLE2TextExtractor {
+public class OutlookTextExtractor implements POIOLE2TextExtractor {
+ private final MAPIMessage msg;
+ private boolean doCloseFilesystem = true;
+
public OutlookTextExtractor(MAPIMessage msg) {
- super(msg);
+ this.msg = msg;
}
public OutlookTextExtractor(DirectoryNode poifsDir) throws IOException {
@@ -76,14 +79,13 @@ public class OutlookTextExtractor extend
* Returns the underlying MAPI message
*/
public MAPIMessage getMAPIMessage() {
- return (MAPIMessage) document;
+ return msg;
}
/**
* Outputs something a little like a RFC822 email
*/
public String getText() {
- MAPIMessage msg = (MAPIMessage) document;
StringBuilder s = new StringBuilder();
// See if we can get a suitable encoding for any
@@ -201,4 +203,24 @@ public class OutlookTextExtractor extend
}
s.append("\n");
}
+
+ @Override
+ public MAPIMessage getDocument() {
+ return msg;
+ }
+
+ @Override
+ public void setCloseFilesystem(boolean doCloseFilesystem) {
+ this.doCloseFilesystem = doCloseFilesystem;
+ }
+
+ @Override
+ public boolean isCloseFilesystem() {
+ return doCloseFilesystem;
+ }
+
+ @Override
+ public MAPIMessage getFilesystem() {
+ return msg;
+ }
}
Modified: poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/extractor/Word6Extractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/extractor/Word6Extractor.java?rev=1880839&r1=1880838&r2=1880839&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/extractor/Word6Extractor.java (original)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/extractor/Word6Extractor.java Thu Aug 13 21:08:24 2020
@@ -31,13 +31,14 @@ import org.apache.poi.poifs.filesystem.P
* Class to extract the text from old (Word 6 / Word 95) Word Documents.
*
* This should only be used on the older files, for most uses you
- * should call {@link WordExtractor} which deals properly
+ * should call {@link WordExtractor} which deals properly
* with HWPF.
*
* @author Nick Burch
*/
-public final class Word6Extractor extends POIOLE2TextExtractor {
+public final class Word6Extractor implements POIOLE2TextExtractor {
private HWPFOldDocument doc;
+ private boolean doCloseFilesystem = true;
/**
* Create a new Word Extractor
@@ -49,12 +50,11 @@ public final class Word6Extractor extend
/**
* Create a new Word Extractor
- *
+ *
* @param fs
* POIFSFileSystem containing the word file
*/
- public Word6Extractor( POIFSFileSystem fs ) throws IOException
- {
+ public Word6Extractor( POIFSFileSystem fs ) throws IOException {
this( fs.getRoot() );
}
@@ -62,14 +62,11 @@ public final class Word6Extractor extend
* @deprecated Use {@link #Word6Extractor(DirectoryNode)} instead
*/
@Deprecated
- public Word6Extractor( DirectoryNode dir, POIFSFileSystem fs )
- throws IOException
- {
+ public Word6Extractor( DirectoryNode dir, POIFSFileSystem fs ) throws IOException {
this( dir );
}
- public Word6Extractor( DirectoryNode dir ) throws IOException
- {
+ public Word6Extractor( DirectoryNode dir ) throws IOException {
this( new HWPFOldDocument( dir ) );
}
@@ -78,7 +75,6 @@ public final class Word6Extractor extend
* @param doc The HWPFOldDocument to extract from
*/
public Word6Extractor(HWPFOldDocument doc) {
- super(doc);
this.doc = doc;
}
@@ -101,7 +97,7 @@ public final class Word6Extractor extend
ret = new String[doc.getTextTable().getTextPieces().size()];
for(int i=0; i<ret.length; i++) {
ret[i] = doc.getTextTable().getTextPieces().get(i).getStringBuilder().toString();
-
+
// Fix the line endings
ret[i] = ret[i].replaceAll("\r", "\ufffe");
ret[i] = ret[i].replaceAll("\ufffe","\r\n");
@@ -111,25 +107,40 @@ public final class Word6Extractor extend
return ret;
}
- public String getText()
- {
- try
- {
+ public String getText() {
+ try {
WordToTextConverter wordToTextConverter = new WordToTextConverter();
wordToTextConverter.processDocument( doc );
return wordToTextConverter.getText();
- }
- catch ( Exception exc )
- {
+ } catch ( Exception exc ) {
// fall-back
StringBuilder text = new StringBuilder();
- for ( String t : getParagraphText() )
- {
+ for ( String t : getParagraphText() ) {
text.append( t );
}
return text.toString();
}
}
+
+ @Override
+ public HWPFOldDocument getDocument() {
+ return doc;
+ }
+
+ @Override
+ public void setCloseFilesystem(boolean doCloseFilesystem) {
+ this.doCloseFilesystem = doCloseFilesystem;
+ }
+
+ @Override
+ public boolean isCloseFilesystem() {
+ return doCloseFilesystem;
+ }
+
+ @Override
+ public HWPFOldDocument getFilesystem() {
+ return doc;
+ }
}
Modified: poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/extractor/WordExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/extractor/WordExtractor.java?rev=1880839&r1=1880838&r2=1880839&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/extractor/WordExtractor.java (original)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/extractor/WordExtractor.java Thu Aug 13 21:08:24 2020
@@ -17,7 +17,6 @@
package org.apache.poi.hwpf.extractor;
-import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
@@ -39,8 +38,9 @@ import org.apache.poi.poifs.filesystem.P
*
* @author Nick Burch
*/
-public final class WordExtractor extends POIOLE2TextExtractor {
- private HWPFDocument doc;
+public final class WordExtractor implements POIOLE2TextExtractor {
+ private final HWPFDocument doc;
+ private boolean doCloseFilesystem = true;
/**
* Create a new Word Extractor
@@ -73,30 +73,10 @@ public final class WordExtractor extends
* The HWPFDocument to extract from
*/
public WordExtractor( HWPFDocument doc ) {
- super( doc );
this.doc = doc;
}
/**
- * Command line extractor, so people will stop moaning that they can't just
- * run this.
- */
- public static void main( String[] args ) throws IOException {
- if ( args.length == 0 ) {
- System.err.println( "Use:" );
- System.err
- .println( " java org.apache.poi.hwpf.extractor.WordExtractor <filename>" );
- System.exit( 1 );
- }
-
- // Process the first argument as a file
- InputStream fin = new FileInputStream( args[0] );
- try (WordExtractor extractor = new WordExtractor(fin)) {
- System.out.println(extractor.getText());
- }
- }
-
- /**
* Get the text from the word file, as an array with one String per
* paragraph
*/
@@ -142,7 +122,7 @@ public final class WordExtractor extends
return getParagraphText( r );
}
- protected static String[] getParagraphText( Range r ) {
+ static String[] getParagraphText( Range r ) {
String[] ret;
ret = new String[r.numParagraphs()];
for ( int i = 0; i < ret.length; i++ ) {
@@ -287,8 +267,27 @@ public final class WordExtractor extends
/**
* Removes any fields (eg macros, page markers etc) from the string.
*/
- public static String stripFields( String text )
- {
+ public static String stripFields( String text ) {
return Range.stripFields( text );
}
+
+ @Override
+ public HWPFDocument getDocument() {
+ return doc;
+ }
+
+ @Override
+ public void setCloseFilesystem(boolean doCloseFilesystem) {
+ this.doCloseFilesystem = doCloseFilesystem;
+ }
+
+ @Override
+ public boolean isCloseFilesystem() {
+ return doCloseFilesystem;
+ }
+
+ @Override
+ public HWPFDocument getFilesystem() {
+ return doc;
+ }
}
Modified: poi/trunk/src/scratchpad/testcases/org/apache/poi/hdgf/extractor/TestVisioExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/testcases/org/apache/poi/hdgf/extractor/TestVisioExtractor.java?rev=1880839&r1=1880838&r2=1880839&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/testcases/org/apache/poi/hdgf/extractor/TestVisioExtractor.java (original)
+++ poi/trunk/src/scratchpad/testcases/org/apache/poi/hdgf/extractor/TestVisioExtractor.java Thu Aug 13 21:08:24 2020
@@ -19,12 +19,9 @@ package org.apache.poi.hdgf.extractor;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertTrue;
-import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
-import java.io.PrintStream;
import org.apache.poi.POIDataSamples;
import org.apache.poi.hdgf.HDGFDiagram;
@@ -32,7 +29,7 @@ import org.apache.poi.poifs.filesystem.P
import org.junit.Test;
public final class TestVisioExtractor {
- private static POIDataSamples _dgTests = POIDataSamples.getDiagramInstance();
+ private static final POIDataSamples _dgTests = POIDataSamples.getDiagramInstance();
private final String defFilename = "Test_Visio-Some_Random_Text.vsd";
private final int defTextChunks = 5;
@@ -63,7 +60,7 @@ public final class TestVisioExtractor {
is3.close();
HDGFDiagram hdgf3 = new HDGFDiagram(poifs3);
-
+
VisioTextExtractor extractor3 = new VisioTextExtractor(hdgf3);
assertNotNull(extractor3);
assertNotNull(extractor3.getAllText());
@@ -97,7 +94,7 @@ public final class TestVisioExtractor {
@Test
public void testProblemFiles() throws Exception {
String[] files = {
- "44594.vsd", "44594-2.vsd",
+ "44594.vsd", "44594-2.vsd",
"ShortChunk1.vsd", "ShortChunk2.vsd", "ShortChunk3.vsd",
"NegativeChunkLength.vsd", "NegativeChunkLength2.vsd"
};
@@ -108,31 +105,6 @@ public final class TestVisioExtractor {
}
}
- @Test
- public void testMain() throws Exception {
- PrintStream oldOut = System.out;
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- PrintStream capture = new PrintStream(baos);
- System.setOut(capture);
-
- String path = _dgTests.getFile(defFilename).getPath();
- VisioTextExtractor.main(new String[] {path});
-
- // Put things back
- System.setOut(oldOut);
-
- // Check
- capture.flush();
- String text = baos.toString();
- // YK: stdout can contain lots of other stuff if logging is sent to console
- // ( -Dorg.apache.poi.util.POILogger=org.apache.poi.util.SystemOutLogger)
- assertTrue( text.contains(
- "text\nView\n" +
- "Test View\nI am a test view\n" +
- "Some random text, on a page\n"
- ));
- }
-
private VisioTextExtractor openExtractor(String fileName) throws IOException {
try (InputStream is = _dgTests.openResourceAsStream(fileName)) {
return new VisioTextExtractor(is);
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@poi.apache.org
For additional commands, e-mail: commits-help@poi.apache.org