You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@poi.apache.org by ki...@apache.org on 2018/04/20 12:53:00 UTC
svn commit: r1829653 [1/2] - in /poi: site/src/documentation/content/xdocs/
site/src/documentation/content/xdocs/slideshow/
trunk/src/integrationtest/org/apache/poi/
trunk/src/integrationtest/org/apache/poi/stress/
trunk/src/java/org/apache/poi/ trunk/...
Author: kiwiwings
Date: Fri Apr 20 12:52:59 2018
New Revision: 1829653
URL: http://svn.apache.org/viewvc?rev=1829653&view=rev
Log:
#62319 - Decommission XSLF-/PowerPointExtractor
Modified:
poi/site/src/documentation/content/xdocs/slideshow/quick-guide.xml
poi/site/src/documentation/content/xdocs/status.xml
poi/site/src/documentation/content/xdocs/text-extraction.xml
poi/trunk/src/integrationtest/org/apache/poi/TestAllFiles.java
poi/trunk/src/integrationtest/org/apache/poi/stress/XSLFFileHandler.java
poi/trunk/src/java/org/apache/poi/POIOLE2TextExtractor.java
poi/trunk/src/java/org/apache/poi/POITextExtractor.java
poi/trunk/src/java/org/apache/poi/extractor/OLE2ExtractorFactory.java
poi/trunk/src/java/org/apache/poi/poifs/filesystem/DocumentFactoryHelper.java
poi/trunk/src/java/org/apache/poi/sl/extractor/SlideShowExtractor.java
poi/trunk/src/java/org/apache/poi/sl/usermodel/SlideShow.java
poi/trunk/src/java/org/apache/poi/sl/usermodel/SlideShowFactory.java
poi/trunk/src/ooxml/java/org/apache/poi/POIXMLTextExtractor.java
poi/trunk/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java
poi/trunk/src/ooxml/java/org/apache/poi/xslf/extractor/XSLFPowerPointExtractor.java
poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/XMLSlideShow.java
poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/XSLFPlaceholderDetails.java
poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/XSLFSlide.java
poi/trunk/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java
poi/trunk/src/ooxml/testcases/org/apache/poi/poifs/crypt/TestHxxFEncryption.java
poi/trunk/src/ooxml/testcases/org/apache/poi/xslf/TestXSLFBugs.java
poi/trunk/src/ooxml/testcases/org/apache/poi/xslf/extractor/TestXSLFPowerPointExtractor.java
poi/trunk/src/scratchpad/src/org/apache/poi/extractor/OLE2ScratchpadExtractorFactory.java
poi/trunk/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java
poi/trunk/src/scratchpad/src/org/apache/poi/hslf/usermodel/HSLFSlideShow.java
poi/trunk/src/scratchpad/src/org/apache/poi/hslf/usermodel/HSLFSlideShowFactory.java
poi/trunk/src/scratchpad/src/org/apache/poi/hslf/usermodel/HSLFSlideShowImpl.java
poi/trunk/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java
poi/trunk/src/scratchpad/testcases/org/apache/poi/hslf/usermodel/TestBugs.java
poi/trunk/test-data/slideshow/SampleShow.pptx
Modified: poi/site/src/documentation/content/xdocs/slideshow/quick-guide.xml
URL: http://svn.apache.org/viewvc/poi/site/src/documentation/content/xdocs/slideshow/quick-guide.xml?rev=1829653&r1=1829652&r2=1829653&view=diff
==============================================================================
--- poi/site/src/documentation/content/xdocs/slideshow/quick-guide.xml (original)
+++ poi/site/src/documentation/content/xdocs/slideshow/quick-guide.xml Fri Apr 20 12:52:59 2018
@@ -31,10 +31,9 @@
<body>
<section><title>Basic Text Extraction</title>
<p>For basic text extraction, make use of
-<code>org.apache.poi.hslf.extractor.PowerPointExtractor</code>. It accepts a file or an input
-stream. The <code>getText()</code> method can be used to get the text from the slides, and the <code>getNotes()</code> method can be used to get the text
-from the notes. Finally, <code>getText(true,true)</code> will get the text
-from both.
+ <code>org.apache.poi.sl.extractor.SlideShowExtractor</code>.
+ It accepts a slideshow which can be created from a file or stream via <code>org.apache.poi.sl.usermodel.SlideShowFactory</code>.
+ The <code>getText()</code> method can be used to get the text from the slides.
</p>
</section>
@@ -121,7 +120,7 @@ The paragraph formatting is defined in t
<li><code>org.apache.poi.hslf.usermodel.HSLFTextRun</code>
Holds a run of text, all having the same character stylings. It is possible to modify text, and/or text stylings.
</li>
- <li><code>org.apache.poi.hslf.extractor.PowerPointExtractor</code>
+ <li><code>org.apache.poi.sl.extractor.SlideShowExtractor</code>
Uses the model code to allow extraction of text from files
</li>
<li><code>org.apache.poi.hslf.extractor.QuickButCruddyTextExtractor</code>
Modified: poi/site/src/documentation/content/xdocs/status.xml
URL: http://svn.apache.org/viewvc/poi/site/src/documentation/content/xdocs/status.xml?rev=1829653&r1=1829652&r2=1829653&view=diff
==============================================================================
--- poi/site/src/documentation/content/xdocs/status.xml (original)
+++ poi/site/src/documentation/content/xdocs/status.xml Fri Apr 20 12:52:59 2018
@@ -68,6 +68,7 @@
<summary-item>Provide new ooxml-schemas-1.4.jar</summary-item>
</summary>
<actions>
+ <action dev="PD" type="add" fixes-bug="62319" breaks-compatibility="true" module="SL Common">Decommission XSLF-/PowerPointExtractor</action>
<action dev="PD" type="add" fixes-bug="62092" module="SL Common">Text not extracted from grouped text shapes in HSLF</action>
<action dev="PD" type="add" fixes-bug="62159" module="OPC">Support XML signature over windows certificate store</action>
<action dev="PD" type="add" fixes-bug="57369" module="XDDF">Add support for major and minor units on chart axes</action>
Modified: poi/site/src/documentation/content/xdocs/text-extraction.xml
URL: http://svn.apache.org/viewvc/poi/site/src/documentation/content/xdocs/text-extraction.xml?rev=1829653&r1=1829652&r2=1829653&view=diff
==============================================================================
--- poi/site/src/documentation/content/xdocs/text-extraction.xml (original)
+++ poi/site/src/documentation/content/xdocs/text-extraction.xml Fri Apr 20 12:52:59 2018
@@ -107,11 +107,11 @@
</section>
<section><title>PowerPoint</title>
- <p>For .ppt files, in scratchpad there is
- <em>org.apache.poi.hslf.extractor.PowerPointExtractor</em>, which
+ <p>For .ppt and .pptx files, there is common extractor
+ <em>org.apache.poi.sl.extractor.SlideShowExtractor.SlideShowExtractor</em>, which
will return text for your slideshow, optionally restricted to just
- slides text or notes text. For .pptx files, the class to use is
- <em>org.apache.poi.xslf.extractor.XSLFPowerPointExtractor</em></p>
+ slides text or notes text. For .ppt you need to add the poi-scratchpad.jar
+ and for .pptx the poi-ooxml.jar and its dependencies are needed</p>
</section>
<section><title>Publisher</title>
Modified: poi/trunk/src/integrationtest/org/apache/poi/TestAllFiles.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/integrationtest/org/apache/poi/TestAllFiles.java?rev=1829653&r1=1829652&r2=1829653&view=diff
==============================================================================
--- poi/trunk/src/integrationtest/org/apache/poi/TestAllFiles.java (original)
+++ poi/trunk/src/integrationtest/org/apache/poi/TestAllFiles.java Fri Apr 20 12:52:59 2018
@@ -330,8 +330,6 @@ public class TestAllFiles {
);
private static final Set<String> IGNORED = unmodifiableHashSet(
- // need JDK8+ - https://bugs.openjdk.java.net/browse/JDK-8038081
- "slideshow/42474-2.ppt",
// OPC handler works / XSSF handler fails
"spreadsheet/57181.xlsm",
"spreadsheet/61300.xls"//intentionally fuzzed -- used to cause infinite loop
Modified: poi/trunk/src/integrationtest/org/apache/poi/stress/XSLFFileHandler.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/integrationtest/org/apache/poi/stress/XSLFFileHandler.java?rev=1829653&r1=1829652&r2=1829653&view=diff
==============================================================================
--- poi/trunk/src/integrationtest/org/apache/poi/stress/XSLFFileHandler.java (original)
+++ poi/trunk/src/integrationtest/org/apache/poi/stress/XSLFFileHandler.java Fri Apr 20 12:52:59 2018
@@ -24,6 +24,7 @@ import java.io.FileInputStream;
import java.io.InputStream;
import org.apache.poi.extractor.ExtractorFactory;
+import org.apache.poi.sl.extractor.SlideShowExtractor;
import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor;
import org.apache.poi.xslf.usermodel.XMLSlideShow;
import org.apache.poi.xslf.usermodel.XSLFSlideShow;
@@ -53,12 +54,19 @@ public class XSLFFileHandler extends Sli
// additionally try the other getText() methods
- try (XSLFPowerPointExtractor extractor = (XSLFPowerPointExtractor) ExtractorFactory.createExtractor(file)) {
+ try (SlideShowExtractor extractor = ExtractorFactory.createExtractor(file)) {
assertNotNull(extractor);
+ extractor.setSlidesByDefault(true);
+ extractor.setNotesByDefault(true);
+ extractor.setMasterByDefault(true);
- assertNotNull(extractor.getText(true, true, true));
- assertEquals("With all options disabled we should not get text",
- "", extractor.getText(false, false, false));
+ assertNotNull(extractor.getText());
+
+ extractor.setSlidesByDefault(false);
+ extractor.setNotesByDefault(false);
+ extractor.setMasterByDefault(false);
+
+ assertEquals("With all options disabled we should not get text", "", extractor.getText());
}
}
Modified: poi/trunk/src/java/org/apache/poi/POIOLE2TextExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/java/org/apache/poi/POIOLE2TextExtractor.java?rev=1829653&r1=1829652&r2=1829653&view=diff
==============================================================================
--- poi/trunk/src/java/org/apache/poi/POIOLE2TextExtractor.java (original)
+++ poi/trunk/src/java/org/apache/poi/POIOLE2TextExtractor.java Fri Apr 20 12:52:59 2018
@@ -105,6 +105,7 @@ public abstract class POIOLE2TextExtract
*
* @return the underlying POIDocument
*/
+ @Override
public POIDocument getDocument() {
return document;
}
Modified: poi/trunk/src/java/org/apache/poi/POITextExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/java/org/apache/poi/POITextExtractor.java?rev=1829653&r1=1829652&r2=1829653&view=diff
==============================================================================
--- poi/trunk/src/java/org/apache/poi/POITextExtractor.java (original)
+++ poi/trunk/src/java/org/apache/poi/POITextExtractor.java Fri Apr 20 12:52:59 2018
@@ -74,4 +74,9 @@ public abstract class POITextExtractor i
fsToClose.close();
}
}
+
+ /**
+ * @return the processed document
+ */
+ public abstract Object getDocument();
}
Modified: poi/trunk/src/java/org/apache/poi/extractor/OLE2ExtractorFactory.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/java/org/apache/poi/extractor/OLE2ExtractorFactory.java?rev=1829653&r1=1829652&r2=1829653&view=diff
==============================================================================
--- poi/trunk/src/java/org/apache/poi/extractor/OLE2ExtractorFactory.java (original)
+++ poi/trunk/src/java/org/apache/poi/extractor/OLE2ExtractorFactory.java Fri Apr 20 12:52:59 2018
@@ -115,26 +115,23 @@ public class OLE2ExtractorFactory {
return threadPreferEventExtractors.get();
}
- public static POIOLE2TextExtractor createExtractor(POIFSFileSystem fs) throws IOException {
- // Only ever an OLE2 one from the root of the FS
- return (POIOLE2TextExtractor)createExtractor(fs.getRoot());
+ public static <T extends POITextExtractor> T createExtractor(POIFSFileSystem fs) throws IOException {
+ return (T)createExtractor(fs.getRoot());
}
- public static POIOLE2TextExtractor createExtractor(NPOIFSFileSystem fs) throws IOException {
- // Only ever an OLE2 one from the root of the FS
- return (POIOLE2TextExtractor)createExtractor(fs.getRoot());
+ public static <T extends POITextExtractor> T createExtractor(NPOIFSFileSystem fs) throws IOException {
+ return (T)createExtractor(fs.getRoot());
}
- public static POIOLE2TextExtractor createExtractor(OPOIFSFileSystem fs) throws IOException {
- // Only ever an OLE2 one from the root of the FS
- return (POIOLE2TextExtractor)createExtractor(fs.getRoot());
+ public static <T extends POITextExtractor> T createExtractor(OPOIFSFileSystem fs) throws IOException {
+ return (T)createExtractor(fs.getRoot());
}
- public static POITextExtractor createExtractor(InputStream input) throws IOException {
+ public static <T extends POITextExtractor> T createExtractor(InputStream input) throws IOException {
Class<?> cls = getOOXMLClass();
if (cls != null) {
// Use Reflection to get us the full OOXML-enabled version
try {
Method m = cls.getDeclaredMethod("createExtractor", InputStream.class);
- return (POITextExtractor)m.invoke(null, input);
+ return (T)m.invoke(null, input);
} catch (IllegalArgumentException iae) {
throw iae;
} catch (Exception e) {
Modified: poi/trunk/src/java/org/apache/poi/poifs/filesystem/DocumentFactoryHelper.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/java/org/apache/poi/poifs/filesystem/DocumentFactoryHelper.java?rev=1829653&r1=1829652&r2=1829653&view=diff
==============================================================================
--- poi/trunk/src/java/org/apache/poi/poifs/filesystem/DocumentFactoryHelper.java (original)
+++ poi/trunk/src/java/org/apache/poi/poifs/filesystem/DocumentFactoryHelper.java Fri Apr 20 12:52:59 2018
@@ -44,8 +44,30 @@ public class DocumentFactoryHelper {
* @throws IOException If an error occurs while decrypting or if the password does not match
*/
public static InputStream getDecryptedStream(final NPOIFSFileSystem fs, String password)
+ throws IOException {
+ // wrap the stream in a FilterInputStream to close the NPOIFSFileSystem
+ // as well when the resulting OPCPackage is closed
+ return new FilterInputStream(getDecryptedStream(fs.getRoot(), password)) {
+ @Override
+ public void close() throws IOException {
+ fs.close();
+ super.close();
+ }
+ };
+ }
+
+ /**
+ * Wrap the OLE2 data of the DirectoryNode into a decrypted stream by using
+ * the given password.
+ *
+ * @param root The OLE2 directory node for the document
+ * @param password The password, null if the default password should be used
+ * @return A stream for reading the decrypted data
+ * @throws IOException If an error occurs while decrypting or if the password does not match
+ */
+ public static InputStream getDecryptedStream(final DirectoryNode root, String password)
throws IOException {
- EncryptionInfo info = new EncryptionInfo(fs);
+ EncryptionInfo info = new EncryptionInfo(root);
Decryptor d = Decryptor.getInstance(info);
try {
@@ -58,21 +80,11 @@ public class DocumentFactoryHelper {
}
if (passwordCorrect) {
- // wrap the stream in a FilterInputStream to close the NPOIFSFileSystem
- // as well when the resulting OPCPackage is closed
- return new FilterInputStream(d.getDataStream(fs.getRoot())) {
- @Override
- public void close() throws IOException {
- fs.close();
-
- super.close();
- }
- };
+ return d.getDataStream(root);
+ } else if (password != null) {
+ throw new EncryptedDocumentException("Password incorrect");
} else {
- if (password != null)
- throw new EncryptedDocumentException("Password incorrect");
- else
- throw new EncryptedDocumentException("The supplied spreadsheet is protected, but no password was supplied");
+ throw new EncryptedDocumentException("The supplied spreadsheet is protected, but no password was supplied");
}
} catch (GeneralSecurityException e) {
throw new IOException(e);
Modified: poi/trunk/src/java/org/apache/poi/sl/extractor/SlideShowExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/java/org/apache/poi/sl/extractor/SlideShowExtractor.java?rev=1829653&r1=1829652&r2=1829653&view=diff
==============================================================================
--- poi/trunk/src/java/org/apache/poi/sl/extractor/SlideShowExtractor.java (original)
+++ poi/trunk/src/java/org/apache/poi/sl/extractor/SlideShowExtractor.java Fri Apr 20 12:52:59 2018
@@ -1,3 +1,20 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
package org.apache.poi.sl.extractor;
import java.util.ArrayList;
@@ -49,6 +66,16 @@ public class SlideShowExtractor<
}
/**
+ * Returns opened document
+ *
+ * @return the opened document
+ */
+ @Override
+ public final Object getDocument() {
+ return slideshow.getPersistDocument();
+ }
+
+ /**
* Should a call to getText() return slide text? Default is yes
*/
public void setSlidesByDefault(final boolean slidesByDefault) {
@@ -219,7 +246,6 @@ public class SlideShowExtractor<
return;
}
for (final P para : paraList) {
- final int oldLen = sb.length();
for (final TextRun tr : para) {
final String str = tr.getRawText().replace("\r", "");
final String newStr;
Modified: poi/trunk/src/java/org/apache/poi/sl/usermodel/SlideShow.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/java/org/apache/poi/sl/usermodel/SlideShow.java?rev=1829653&r1=1829652&r2=1829653&view=diff
==============================================================================
--- poi/trunk/src/java/org/apache/poi/sl/usermodel/SlideShow.java (original)
+++ poi/trunk/src/java/org/apache/poi/sl/usermodel/SlideShow.java Fri Apr 20 12:52:59 2018
@@ -126,4 +126,13 @@ public interface SlideShow<
* @since POI 4.0.0
*/
POITextExtractor getMetadataTextExtractor();
+
+ /**
+ * @return the instance which handles the persisting of the slideshow,
+ * which is either a subclass of {@link org.apache.poi.POIDocument}
+ * or {@link org.apache.poi.POIXMLDocument}
+ *
+ * @since POI 4.0.0
+ */
+ Object getPersistDocument();
}
Modified: poi/trunk/src/java/org/apache/poi/sl/usermodel/SlideShowFactory.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/java/org/apache/poi/sl/usermodel/SlideShowFactory.java?rev=1829653&r1=1829652&r2=1829653&view=diff
==============================================================================
--- poi/trunk/src/java/org/apache/poi/sl/usermodel/SlideShowFactory.java (original)
+++ poi/trunk/src/java/org/apache/poi/sl/usermodel/SlideShowFactory.java Fri Apr 20 12:52:59 2018
@@ -60,13 +60,40 @@ public class SlideShowFactory {
* @throws IOException if an error occurs while reading the data
*/
public static SlideShow<?,?> create(final NPOIFSFileSystem fs, String password) throws IOException {
- DirectoryNode root = fs.getRoot();
+ return create(fs.getRoot(), password);
+ }
+ /**
+ * Creates a SlideShow from the given NPOIFSFileSystem.
+ *
+ * @param root The {@link DirectoryNode} to start reading the document from
+ *
+ * @return The created SlideShow
+ *
+ * @throws IOException if an error occurs while reading the data
+ */
+ public static SlideShow<?,?> create(final DirectoryNode root) throws IOException {
+ return create(root, null);
+ }
+
+
+ /**
+ * Creates a SlideShow from the given NPOIFSFileSystem, which may
+ * be password protected
+ *
+ * @param root The {@link DirectoryNode} to start reading the document from
+ * @param password The password that should be used or null if no password is necessary.
+ *
+ * @return The created SlideShow
+ *
+ * @throws IOException if an error occurs while reading the data
+ */
+ public static SlideShow<?,?> create(final DirectoryNode root, String password) throws IOException {
// Encrypted OOXML files go inside OLE2 containers, is this one?
if (root.hasEntry(Decryptor.DEFAULT_POIFS_ENTRY)) {
InputStream stream = null;
try {
- stream = DocumentFactoryHelper.getDecryptedStream(fs, password);
+ stream = DocumentFactoryHelper.getDecryptedStream(root, password);
return createXSLFSlideShow(stream);
} finally {
@@ -82,7 +109,7 @@ public class SlideShowFactory {
passwordSet = true;
}
try {
- return createHSLFSlideShow(fs);
+ return createHSLFSlideShow(root);
} finally {
if (passwordSet) {
Biff8EncryptionKey.setCurrentUserPassword(null);
Modified: poi/trunk/src/ooxml/java/org/apache/poi/POIXMLTextExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/POIXMLTextExtractor.java?rev=1829653&r1=1829652&r2=1829653&view=diff
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/POIXMLTextExtractor.java (original)
+++ poi/trunk/src/ooxml/java/org/apache/poi/POIXMLTextExtractor.java Fri Apr 20 12:52:59 2018
@@ -68,6 +68,7 @@ public abstract class POIXMLTextExtracto
*
* @return the opened document
*/
+ @Override
public final POIXMLDocument getDocument() {
return _document;
}
Modified: poi/trunk/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java?rev=1829653&r1=1829652&r2=1829653&view=diff
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java (original)
+++ poi/trunk/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java Fri Apr 20 12:52:59 2018
@@ -51,6 +51,7 @@ import org.apache.poi.poifs.filesystem.N
import org.apache.poi.poifs.filesystem.OPOIFSFileSystem;
import org.apache.poi.poifs.filesystem.OfficeXmlFileException;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+import org.apache.poi.sl.extractor.SlideShowExtractor;
import org.apache.poi.util.IOUtils;
import org.apache.poi.util.NotImplemented;
import org.apache.poi.util.POILogFactory;
@@ -58,6 +59,7 @@ import org.apache.poi.util.POILogger;
import org.apache.poi.util.Removal;
import org.apache.poi.xdgf.extractor.XDGFVisioExtractor;
import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor;
+import org.apache.poi.xslf.usermodel.XMLSlideShow;
import org.apache.poi.xslf.usermodel.XSLFRelation;
import org.apache.poi.xslf.usermodel.XSLFSlideShow;
import org.apache.poi.xssf.extractor.XSSFBEventBasedExcelExtractor;
@@ -127,20 +129,20 @@ public class ExtractorFactory {
return OLE2ExtractorFactory.getPreferEventExtractor();
}
- public static POITextExtractor createExtractor(File f) throws IOException, OpenXML4JException, XmlException {
+ public static <T extends POITextExtractor> T createExtractor(File f) throws IOException, OpenXML4JException, XmlException {
NPOIFSFileSystem fs = null;
try {
fs = new NPOIFSFileSystem(f);
if (fs.getRoot().hasEntry(Decryptor.DEFAULT_POIFS_ENTRY)) {
- return createEncryptedOOXMLExtractor(fs);
+ return (T)createEncryptedOOXMLExtractor(fs);
}
- POIOLE2TextExtractor extractor = createExtractor(fs);
+ POITextExtractor extractor = createExtractor(fs);
extractor.setFilesystem(fs);
- return extractor;
+ return (T)extractor;
} catch (OfficeXmlFileException e) {
// ensure file-handle release
IOUtils.closeQuietly(fs);
- return createExtractor(OPCPackage.open(f.toString(), PackageAccess.READ));
+ return (T)createExtractor(OPCPackage.open(f.toString(), PackageAccess.READ));
} catch (NotOLE2FileException ne) {
// ensure file-handle release
IOUtils.closeQuietly(fs);
@@ -179,7 +181,7 @@ public class ExtractorFactory {
* @throws XmlException If an XML parsing error occurs.
* @throws IllegalArgumentException If no matching file type could be found.
*/
- public static POIXMLTextExtractor createExtractor(OPCPackage pkg) throws IOException, OpenXML4JException, XmlException {
+ public static POITextExtractor createExtractor(OPCPackage pkg) throws IOException, OpenXML4JException, XmlException {
try {
// Check for the normal Office core document
PackageRelationshipCollection core;
@@ -226,13 +228,13 @@ public class ExtractorFactory {
// Is it XSLF?
for (XSLFRelation rel : XSLFPowerPointExtractor.SUPPORTED_TYPES) {
if ( rel.getContentType().equals( contentType ) ) {
- return new XSLFPowerPointExtractor(pkg);
+ return new SlideShowExtractor(new XMLSlideShow(pkg));
}
}
// special handling for SlideShow-Theme-files,
if (XSLFRelation.THEME_MANAGER.getContentType().equals(contentType)) {
- return new XSLFPowerPointExtractor(new XSLFSlideShow(pkg));
+ return new SlideShowExtractor(new XMLSlideShow(pkg));
}
// How about xlsb?
@@ -252,28 +254,28 @@ public class ExtractorFactory {
}
}
- public static POIOLE2TextExtractor createExtractor(POIFSFileSystem fs) throws IOException, OpenXML4JException, XmlException {
- return OLE2ExtractorFactory.createExtractor(fs);
+ public static <T extends POITextExtractor> T createExtractor(POIFSFileSystem fs) throws IOException, OpenXML4JException, XmlException {
+ return createExtractor(fs.getRoot());
}
- public static POIOLE2TextExtractor createExtractor(NPOIFSFileSystem fs) throws IOException, OpenXML4JException, XmlException {
- return OLE2ExtractorFactory.createExtractor(fs);
+ public static <T extends POITextExtractor> T createExtractor(NPOIFSFileSystem fs) throws IOException, OpenXML4JException, XmlException {
+ return createExtractor(fs.getRoot());
}
- public static POIOLE2TextExtractor createExtractor(OPOIFSFileSystem fs) throws IOException, OpenXML4JException, XmlException {
- return OLE2ExtractorFactory.createExtractor(fs);
+ public static <T extends POITextExtractor> T createExtractor(OPOIFSFileSystem fs) throws IOException, OpenXML4JException, XmlException {
+ return createExtractor(fs.getRoot());
}
- public static POITextExtractor createExtractor(DirectoryNode poifsDir) throws IOException, OpenXML4JException, XmlException
+ public static <T extends POITextExtractor> T createExtractor(DirectoryNode poifsDir) throws IOException, OpenXML4JException, XmlException
{
// First, check for OOXML
for (String entryName : poifsDir.getEntryNames()) {
if (entryName.equals("Package")) {
OPCPackage pkg = OPCPackage.open(poifsDir.createDocumentInputStream("Package"));
- return createExtractor(pkg);
+ return (T)createExtractor(pkg);
}
}
// If not, ask the OLE2 code to check, with Scratchpad if possible
- return OLE2ExtractorFactory.createExtractor(poifsDir);
+ return (T)OLE2ExtractorFactory.createExtractor(poifsDir);
}
/**
@@ -403,7 +405,7 @@ public class ExtractorFactory {
throw new IllegalStateException("Not yet supported");
}
- private static POIXMLTextExtractor createEncryptedOOXMLExtractor(NPOIFSFileSystem fs)
+ private static POITextExtractor createEncryptedOOXMLExtractor(NPOIFSFileSystem fs)
throws IOException {
String pass = Biff8EncryptionKey.getCurrentUserPassword();
if (pass == null) {
Modified: poi/trunk/src/ooxml/java/org/apache/poi/xslf/extractor/XSLFPowerPointExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/xslf/extractor/XSLFPowerPointExtractor.java?rev=1829653&r1=1829652&r2=1829653&view=diff
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/xslf/extractor/XSLFPowerPointExtractor.java (original)
+++ poi/trunk/src/ooxml/java/org/apache/poi/xslf/extractor/XSLFPowerPointExtractor.java Fri Apr 20 12:52:59 2018
@@ -37,7 +37,7 @@ import org.apache.xmlbeans.XmlException;
* @deprecated use {@link SlideShowExtractor}
*/
@Deprecated
-@Removal(version="4.2.0")
+@Removal(version="5.0.0")
public class XSLFPowerPointExtractor extends POIXMLTextExtractor {
public static final XSLFRelation[] SUPPORTED_TYPES = new XSLFRelation[]{
XSLFRelation.MAIN, XSLFRelation.MACRO, XSLFRelation.MACRO_TEMPLATE,
Modified: poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/XMLSlideShow.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/XMLSlideShow.java?rev=1829653&r1=1829652&r2=1829653&view=diff
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/XMLSlideShow.java (original)
+++ poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/XMLSlideShow.java Fri Apr 20 12:52:59 2018
@@ -631,4 +631,9 @@ public class XMLSlideShow extends POIXML
public POIXMLPropertiesTextExtractor getMetadataTextExtractor() {
return new POIXMLPropertiesTextExtractor(this);
}
+
+ @Override
+ public Object getPersistDocument() {
+ return this;
+ }
}
Modified: poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/XSLFPlaceholderDetails.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/XSLFPlaceholderDetails.java?rev=1829653&r1=1829652&r2=1829653&view=diff
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/XSLFPlaceholderDetails.java (original)
+++ poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/XSLFPlaceholderDetails.java Fri Apr 20 12:52:59 2018
@@ -1,3 +1,20 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
package org.apache.poi.xslf.usermodel;
import static org.apache.poi.xslf.usermodel.XSLFShape.PML_NS;
Modified: poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/XSLFSlide.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/XSLFSlide.java?rev=1829653&r1=1829652&r2=1829653&view=diff
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/XSLFSlide.java (original)
+++ poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/XSLFSlide.java Fri Apr 20 12:52:59 2018
@@ -182,10 +182,18 @@ implements Slide<XSLFShape,XSLFTextParag
*/
public XSLFCommentAuthors getCommentAuthorsPart() {
if(_commentAuthors == null) {
+ // first scan the slide relations
for (POIXMLDocumentPart p : getRelations()) {
if (p instanceof XSLFCommentAuthors) {
_commentAuthors = (XSLFCommentAuthors)p;
return _commentAuthors;
+ }
+ }
+ // then scan the presentation relations
+ for (POIXMLDocumentPart p : getSlideShow().getRelations()) {
+ if (p instanceof XSLFCommentAuthors) {
+ _commentAuthors = (XSLFCommentAuthors)p;
+ return _commentAuthors;
}
}
}
Modified: poi/trunk/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java?rev=1829653&r1=1829652&r2=1829653&view=diff
==============================================================================
--- poi/trunk/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java (original)
+++ poi/trunk/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java Fri Apr 20 12:52:59 2018
@@ -27,16 +27,15 @@ import static org.junit.Assert.fail;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
+import java.util.Locale;
import org.apache.poi.POIDataSamples;
import org.apache.poi.POIOLE2TextExtractor;
import org.apache.poi.POITextExtractor;
-import org.apache.poi.POIXMLException;
import org.apache.poi.POIXMLTextExtractor;
import org.apache.poi.UnsupportedFileFormatException;
import org.apache.poi.hdgf.extractor.VisioTextExtractor;
import org.apache.poi.hpbf.extractor.PublisherTextExtractor;
-import org.apache.poi.hslf.extractor.PowerPointExtractor;
import org.apache.poi.hsmf.extractor.OutlookTextExtactor;
import org.apache.poi.hssf.HSSFTestDataSamples;
import org.apache.poi.hssf.OldExcelFormatException;
@@ -44,18 +43,20 @@ import org.apache.poi.hssf.extractor.Eve
import org.apache.poi.hssf.extractor.ExcelExtractor;
import org.apache.poi.hwpf.extractor.Word6Extractor;
import org.apache.poi.hwpf.extractor.WordExtractor;
+import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackageAccess;
import org.apache.poi.poifs.filesystem.OPOIFSFileSystem;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+import org.apache.poi.sl.extractor.SlideShowExtractor;
import org.apache.poi.util.POILogFactory;
import org.apache.poi.util.POILogger;
import org.apache.poi.xdgf.extractor.XDGFVisioExtractor;
-import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor;
+import org.apache.poi.xssf.extractor.XSSFBEventBasedExcelExtractor;
import org.apache.poi.xssf.extractor.XSSFEventBasedExcelExtractor;
import org.apache.poi.xssf.extractor.XSSFExcelExtractor;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
-import org.junit.BeforeClass;
+import org.apache.xmlbeans.XmlException;
import org.junit.Test;
/**
@@ -65,34 +66,39 @@ public class TestExtractorFactory {
private static final POILogger LOG = POILogFactory.getLogger(TestExtractorFactory.class);
- private static File txt;
+ private static final POIDataSamples ssTests = POIDataSamples.getSpreadSheetInstance();
+ private static final File xls = getFileAndCheck(ssTests, "SampleSS.xls");
+ private static final File xlsx = getFileAndCheck(ssTests, "SampleSS.xlsx");
+ private static final File xlsxStrict = getFileAndCheck(ssTests, "SampleSS.strict.xlsx");
+ private static final File xltx = getFileAndCheck(ssTests, "test.xltx");
+ private static final File xlsEmb = getFileAndCheck(ssTests, "excel_with_embeded.xls");
+ private static final File xlsb = getFileAndCheck(ssTests, "testVarious.xlsb");
+
+ private static final POIDataSamples wpTests = POIDataSamples.getDocumentInstance();
+ private static final File doc = getFileAndCheck(wpTests, "SampleDoc.doc");
+ private static final File doc6 = getFileAndCheck(wpTests, "Word6.doc");
+ private static final File doc95 = getFileAndCheck(wpTests, "Word95.doc");
+ private static final File docx = getFileAndCheck(wpTests, "SampleDoc.docx");
+ private static final File dotx = getFileAndCheck(wpTests, "test.dotx");
+ private static final File docEmb = getFileAndCheck(wpTests, "word_with_embeded.doc");
+ private static final File docEmbOOXML = getFileAndCheck(wpTests, "word_with_embeded_ooxml.doc");
+
+ private static final POIDataSamples slTests = POIDataSamples.getSlideShowInstance();
+ private static final File ppt = getFileAndCheck(slTests, "SampleShow.ppt");
+ private static final File pptx = getFileAndCheck(slTests, "SampleShow.pptx");
+ private static final File txt = getFileAndCheck(slTests, "SampleShow.txt");
+
+ private static final POIDataSamples olTests = POIDataSamples.getHSMFInstance();
+ private static final File msg = getFileAndCheck(olTests, "quick.msg");
+ private static final File msgEmb = getFileAndCheck(olTests, "attachment_test_msg.msg");
+ private static final File msgEmbMsg = getFileAndCheck(olTests, "attachment_msg_pdf.msg");
+
+ private static final POIDataSamples dgTests = POIDataSamples.getDiagramInstance();
+ private static final File vsd = getFileAndCheck(dgTests, "Test_Visio-Some_Random_Text.vsd");
+ private static final File vsdx = getFileAndCheck(dgTests, "test.vsdx");
- private static File xls;
- private static File xlsx;
- private static File xlsxStrict;
- private static File xltx;
- private static File xlsEmb;
- private static File xlsb;
-
- private static File doc;
- private static File doc6;
- private static File doc95;
- private static File docx;
- private static File dotx;
- private static File docEmb;
- private static File docEmbOOXML;
-
- private static File ppt;
- private static File pptx;
-
- private static File msg;
- private static File msgEmb;
- private static File msgEmbMsg;
-
- private static File vsd;
- private static File vsdx;
-
- private static File pub;
+ private static POIDataSamples pubTests = POIDataSamples.getPublisherInstance();
+ private static File pub = getFileAndCheck(pubTests, "Simple.pub");
private static File getFileAndCheck(POIDataSamples samples, String name) {
File file = samples.getFile(name);
@@ -104,595 +110,133 @@ public class TestExtractorFactory {
return file;
}
- @BeforeClass
- public static void setUp() throws Exception {
+ private static final Object[] TEST_SET = {
+ "Excel", xls, ExcelExtractor.class, 200,
+ "Excel - xlsx", xlsx, XSSFExcelExtractor.class, 200,
+ "Excel - xltx", xltx, XSSFExcelExtractor.class, -1,
+ "Excel - xlsb", xlsb, XSSFBEventBasedExcelExtractor.class, -1,
+ "Word", doc, WordExtractor.class, 120,
+ "Word - docx", docx, XWPFWordExtractor.class, 120,
+ "Word - dotx", dotx, XWPFWordExtractor.class, -1,
+ "Word 6", doc6, Word6Extractor.class, 20,
+ "Word 95", doc95, Word6Extractor.class, 120,
+ "PowerPoint", ppt, SlideShowExtractor.class, 120,
+ "PowerPoint - pptx", pptx, SlideShowExtractor.class, 120,
+ "Visio", vsd, VisioTextExtractor.class, 50,
+ "Visio - vsdx", vsdx, XDGFVisioExtractor.class, 20,
+ "Publisher", pub, PublisherTextExtractor.class, 50,
+ "Outlook msg", msg, OutlookTextExtactor.class, 50,
+
+ // TODO Support OOXML-Strict, see bug #57699
+ // xlsxStrict
+ };
- POIDataSamples ssTests = POIDataSamples.getSpreadSheetInstance();
- xls = getFileAndCheck(ssTests, "SampleSS.xls");
- xlsx = getFileAndCheck(ssTests, "SampleSS.xlsx");
- xlsxStrict = getFileAndCheck(ssTests, "SampleSS.strict.xlsx");
- xltx = getFileAndCheck(ssTests, "test.xltx");
- xlsEmb = getFileAndCheck(ssTests, "excel_with_embeded.xls");
- xlsb = getFileAndCheck(ssTests, "testVarious.xlsb");
-
- POIDataSamples wpTests = POIDataSamples.getDocumentInstance();
- doc = getFileAndCheck(wpTests, "SampleDoc.doc");
- doc6 = getFileAndCheck(wpTests, "Word6.doc");
- doc95 = getFileAndCheck(wpTests, "Word95.doc");
- docx = getFileAndCheck(wpTests, "SampleDoc.docx");
- dotx = getFileAndCheck(wpTests, "test.dotx");
- docEmb = getFileAndCheck(wpTests, "word_with_embeded.doc");
- docEmbOOXML = getFileAndCheck(wpTests, "word_with_embeded_ooxml.doc");
-
- POIDataSamples slTests = POIDataSamples.getSlideShowInstance();
- ppt = getFileAndCheck(slTests, "SampleShow.ppt");
- pptx = getFileAndCheck(slTests, "SampleShow.pptx");
- txt = getFileAndCheck(slTests, "SampleShow.txt");
-
- POIDataSamples dgTests = POIDataSamples.getDiagramInstance();
- vsd = getFileAndCheck(dgTests, "Test_Visio-Some_Random_Text.vsd");
- vsdx = getFileAndCheck(dgTests, "test.vsdx");
-
- POIDataSamples pubTests = POIDataSamples.getPublisherInstance();
- pub = getFileAndCheck(pubTests, "Simple.pub");
-
- POIDataSamples olTests = POIDataSamples.getHSMFInstance();
- msg = getFileAndCheck(olTests, "quick.msg");
- msgEmb = getFileAndCheck(olTests, "attachment_test_msg.msg");
- msgEmbMsg = getFileAndCheck(olTests, "attachment_msg_pdf.msg");
+ @FunctionalInterface
+ interface FunctionEx<T, R> {
+ R apply(T t) throws IOException, OpenXML4JException, XmlException;
}
+
@Test
public void testFile() throws Exception {
- // Excel
- POITextExtractor xlsExtractor = ExtractorFactory.createExtractor(xls);
- assertNotNull("Had empty extractor for " + xls, xlsExtractor);
- assertTrue("Expected instanceof ExcelExtractor, but had: " + xlsExtractor.getClass(),
- xlsExtractor
- instanceof ExcelExtractor
- );
- assertTrue(
- xlsExtractor.getText().length() > 200
- );
- xlsExtractor.close();
-
- POITextExtractor extractor = ExtractorFactory.createExtractor(xlsx);
- assertTrue(
- extractor.getClass().getName(),
- extractor
- instanceof XSSFExcelExtractor
- );
- extractor.close();
-
- extractor = ExtractorFactory.createExtractor(xlsx);
- assertTrue(
- extractor.getText().length() > 200
- );
- extractor.close();
-
- extractor = ExtractorFactory.createExtractor(xltx);
- assertTrue(
- extractor.getClass().getName(),
- extractor
- instanceof XSSFExcelExtractor
- );
- extractor.close();
-
- extractor = ExtractorFactory.createExtractor(xlsb);
- assertContains(extractor.getText(), "test");
- extractor.close();
-
-
- extractor = ExtractorFactory.createExtractor(xltx);
- assertContains(extractor.getText(), "test");
- extractor.close();
-
- // TODO Support OOXML-Strict, see bug #57699
- try {
- /*extractor =*/ ExtractorFactory.createExtractor(xlsxStrict);
- fail("OOXML-Strict isn't yet supported");
- } catch (POIXMLException e) {
- // Expected, for now
+ for (int i = 0; i < TEST_SET.length; i += 4) {
+ try (POITextExtractor ext = ExtractorFactory.createExtractor((File) TEST_SET[i + 1])) {
+ testExtractor(ext, (String) TEST_SET[i], (Class) TEST_SET[i + 2], (Integer) TEST_SET[i + 3]);
+ }
}
-// extractor = ExtractorFactory.createExtractor(xlsxStrict);
-// assertTrue(
-// extractor
-// instanceof XSSFExcelExtractor
-// );
-// extractor.close();
-//
-// extractor = ExtractorFactory.createExtractor(xlsxStrict);
-// assertTrue(
-// extractor.getText().contains("test")
-// );
-// extractor.close();
-
-
- // Word
- extractor = ExtractorFactory.createExtractor(doc);
- assertTrue(
- extractor
- instanceof WordExtractor
- );
- assertTrue(
- extractor.getText().length() > 120
- );
- extractor.close();
-
- extractor = ExtractorFactory.createExtractor(doc6);
- assertTrue(
- extractor
- instanceof Word6Extractor
- );
- assertTrue(
- extractor.getText().length() > 20
- );
- extractor.close();
-
- extractor = ExtractorFactory.createExtractor(doc95);
- assertTrue(
- extractor
- instanceof Word6Extractor
- );
- assertTrue(
- extractor.getText().length() > 120
- );
- extractor.close();
-
- extractor = ExtractorFactory.createExtractor(docx);
- assertTrue(
- extractor instanceof XWPFWordExtractor
- );
- extractor.close();
-
- extractor = ExtractorFactory.createExtractor(docx);
- assertTrue(
- extractor.getText().length() > 120
- );
- extractor.close();
-
- extractor = ExtractorFactory.createExtractor(dotx);
- assertTrue(
- extractor instanceof XWPFWordExtractor
- );
- extractor.close();
-
- extractor = ExtractorFactory.createExtractor(dotx);
- assertContains(extractor.getText(), "Test");
- extractor.close();
-
- // PowerPoint (PPT)
- extractor = ExtractorFactory.createExtractor(ppt);
- assertTrue(
- extractor
- instanceof PowerPointExtractor
- );
- assertTrue(
- extractor.getText().length() > 120
- );
- extractor.close();
-
- // PowerPoint (PPTX)
- extractor = ExtractorFactory.createExtractor(pptx);
- assertTrue(
- extractor
- instanceof XSLFPowerPointExtractor
- );
- assertTrue(
- extractor.getText().length() > 120
- );
- extractor.close();
-
- // Visio - binary
- extractor = ExtractorFactory.createExtractor(vsd);
- assertTrue(
- extractor
- instanceof VisioTextExtractor
- );
- assertTrue(
- extractor.getText().length() > 50
- );
- extractor.close();
-
- // Visio - vsdx
- extractor = ExtractorFactory.createExtractor(vsdx);
- assertTrue(
- extractor
- instanceof XDGFVisioExtractor
- );
- assertTrue(
- extractor.getText().length() > 20
- );
- extractor.close();
-
- // Publisher
- extractor = ExtractorFactory.createExtractor(pub);
- assertTrue(
- extractor
- instanceof PublisherTextExtractor
- );
- assertTrue(
- extractor.getText().length() > 50
- );
- extractor.close();
-
- // Outlook msg
- extractor = ExtractorFactory.createExtractor(msg);
- assertTrue(
- extractor
- instanceof OutlookTextExtactor
- );
- assertTrue(
- extractor.getText().length() > 50
- );
- extractor.close();
+ }
+ @Test(expected = IllegalArgumentException.class)
+ public void testFileInvalid() throws Exception {
// Text
- try {
- ExtractorFactory.createExtractor(txt);
- fail("expected IllegalArgumentException");
- } catch(IllegalArgumentException e) {
- // Good
- }
+ try (POITextExtractor te = ExtractorFactory.createExtractor(txt)) {}
}
@Test
public void testInputStream() throws Exception {
- // Excel
- POITextExtractor extractor = ExtractorFactory.createExtractor(new FileInputStream(xls));
- assertTrue(
- extractor
- instanceof ExcelExtractor
- );
- assertTrue(
- extractor.getText().length() > 200
- );
- extractor.close();
-
- extractor = ExtractorFactory.createExtractor(new FileInputStream(xlsx));
- assertTrue(
- extractor.getClass().getName(),
- extractor
- instanceof XSSFExcelExtractor
- );
- assertTrue(
- extractor.getText().length() > 200
- );
- // TODO Support OOXML-Strict, see bug #57699
-// assertTrue(
-// ExtractorFactory.createExtractor(new FileInputStream(xlsxStrict))
-// instanceof XSSFExcelExtractor
-// );
-// assertTrue(
-// ExtractorFactory.createExtractor(new FileInputStream(xlsxStrict)).getText().length() > 200
-// );
- extractor.close();
-
- // Word
- extractor = ExtractorFactory.createExtractor(new FileInputStream(doc));
- assertTrue(
- extractor.getClass().getName(),
- extractor
- instanceof WordExtractor
- );
- assertTrue(
- extractor.getText().length() > 120
- );
- extractor.close();
-
- extractor = ExtractorFactory.createExtractor(new FileInputStream(doc6));
- assertTrue(
- extractor.getClass().getName(),
- extractor
- instanceof Word6Extractor
- );
- assertTrue(
- extractor.getText().length() > 20
- );
- extractor.close();
-
- extractor = ExtractorFactory.createExtractor(new FileInputStream(doc95));
- assertTrue(
- extractor.getClass().getName(),
- extractor
- instanceof Word6Extractor
- );
- assertTrue(
- extractor.getText().length() > 120
- );
- extractor.close();
-
- extractor = ExtractorFactory.createExtractor(new FileInputStream(docx));
- assertTrue(
- extractor
- instanceof XWPFWordExtractor
- );
- assertTrue(
- extractor.getText().length() > 120
- );
- extractor.close();
-
- // PowerPoint
- extractor = ExtractorFactory.createExtractor(new FileInputStream(ppt));
- assertTrue(
- extractor
- instanceof PowerPointExtractor
- );
- assertTrue(
- extractor.getText().length() > 120
- );
- extractor.close();
-
- extractor = ExtractorFactory.createExtractor(new FileInputStream(pptx));
- assertTrue(
- extractor
- instanceof XSLFPowerPointExtractor
- );
- assertTrue(
- extractor.getText().length() > 120
- );
- extractor.close();
-
- // Visio
- extractor = ExtractorFactory.createExtractor(new FileInputStream(vsd));
- assertTrue(
- extractor
- instanceof VisioTextExtractor
- );
- assertTrue(
- extractor.getText().length() > 50
- );
- extractor.close();
-
- // Visio - vsdx
- extractor = ExtractorFactory.createExtractor(new FileInputStream(vsdx));
- assertTrue(
- extractor
- instanceof XDGFVisioExtractor
- );
- assertTrue(
- extractor.getText().length() > 20
- );
- extractor.close();
-
- // Publisher
- extractor = ExtractorFactory.createExtractor(new FileInputStream(pub));
- assertTrue(
- extractor
- instanceof PublisherTextExtractor
- );
- assertTrue(
- extractor.getText().length() > 50
- );
- extractor.close();
-
- // Outlook msg
- extractor = ExtractorFactory.createExtractor(new FileInputStream(msg));
- assertTrue(
- extractor
- instanceof OutlookTextExtactor
- );
- assertTrue(
- extractor.getText().length() > 50
- );
- extractor.close();
+ testStream((f) -> ExtractorFactory.createExtractor(f), true);
+ }
- // Text
- try (FileInputStream stream = new FileInputStream(txt)) {
- ExtractorFactory.createExtractor(stream);
- fail("expected IllegalArgumentException");
- } catch(IllegalArgumentException e) {
- // Good
- }
+ @Test(expected = IllegalArgumentException.class)
+ public void testInputStreamInvalid() throws Exception {
+ testInvalid((f) -> ExtractorFactory.createExtractor(f));
}
@Test
public void testPOIFS() throws Exception {
- // Excel
- assertTrue(
- ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(xls)))
- instanceof ExcelExtractor
- );
- assertTrue(
- ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(xls))).getText().length() > 200
- );
-
- // Word
- assertTrue(
- ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(doc)))
- instanceof WordExtractor
- );
- assertTrue(
- ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(doc))).getText().length() > 120
- );
-
- assertTrue(
- ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(doc6)))
- instanceof Word6Extractor
- );
- assertTrue(
- ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(doc6))).getText().length() > 20
- );
-
- assertTrue(
- ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(doc95)))
- instanceof Word6Extractor
- );
- assertTrue(
- ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(doc95))).getText().length() > 120
- );
-
- // PowerPoint
- assertTrue(
- ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(ppt)))
- instanceof PowerPointExtractor
- );
- assertTrue(
- ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(ppt))).getText().length() > 120
- );
-
- // Visio
- assertTrue(
- ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(vsd)))
- instanceof VisioTextExtractor
- );
- assertTrue(
- ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(vsd))).getText().length() > 50
- );
-
- // Publisher
- assertTrue(
- ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(pub)))
- instanceof PublisherTextExtractor
- );
- assertTrue(
- ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(pub))).getText().length() > 50
- );
-
- // Outlook msg
- assertTrue(
- ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(msg)))
- instanceof OutlookTextExtactor
- );
- assertTrue(
- ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(msg))).getText().length() > 50
- );
-
- // Text
- try {
- ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(txt)));
- fail("expected IllegalArgumentException");
- } catch(IOException e) {
- // Good
- }
+ testStream((f) -> ExtractorFactory.createExtractor(new POIFSFileSystem(f)), false);
}
+ @Test(expected = IOException.class)
+ public void testPOIFSInvalid() throws Exception {
+ testInvalid((f) -> ExtractorFactory.createExtractor(new POIFSFileSystem(f)));
+ }
@Test
public void testOPOIFS() throws Exception {
- // Excel
- assertTrue(
- ExtractorFactory.createExtractor(new OPOIFSFileSystem(new FileInputStream(xls)))
- instanceof ExcelExtractor
- );
- assertTrue(
- ExtractorFactory.createExtractor(new OPOIFSFileSystem(new FileInputStream(xls))).getText().length() > 200
- );
-
- // Word
- assertTrue(
- ExtractorFactory.createExtractor(new OPOIFSFileSystem(new FileInputStream(doc)))
- instanceof WordExtractor
- );
- assertTrue(
- ExtractorFactory.createExtractor(new OPOIFSFileSystem(new FileInputStream(doc))).getText().length() > 120
- );
-
- assertTrue(
- ExtractorFactory.createExtractor(new OPOIFSFileSystem(new FileInputStream(doc6)))
- instanceof Word6Extractor
- );
- assertTrue(
- ExtractorFactory.createExtractor(new OPOIFSFileSystem(new FileInputStream(doc6))).getText().length() > 20
- );
-
- assertTrue(
- ExtractorFactory.createExtractor(new OPOIFSFileSystem(new FileInputStream(doc95)))
- instanceof Word6Extractor
- );
- assertTrue(
- ExtractorFactory.createExtractor(new OPOIFSFileSystem(new FileInputStream(doc95))).getText().length() > 120
- );
+ testStream((f) -> ExtractorFactory.createExtractor(new OPOIFSFileSystem(f)), false);
+ }
- // PowerPoint
- assertTrue(
- ExtractorFactory.createExtractor(new OPOIFSFileSystem(new FileInputStream(ppt)))
- instanceof PowerPointExtractor
- );
- assertTrue(
- ExtractorFactory.createExtractor(new OPOIFSFileSystem(new FileInputStream(ppt))).getText().length() > 120
- );
+ @Test(expected = IOException.class)
+ public void testOPOIFSInvalid() throws Exception {
+ testInvalid((f) -> ExtractorFactory.createExtractor(new OPOIFSFileSystem(f)));
+ }
- // Visio
- assertTrue(
- ExtractorFactory.createExtractor(new OPOIFSFileSystem(new FileInputStream(vsd)))
- instanceof VisioTextExtractor
- );
- assertTrue(
- ExtractorFactory.createExtractor(new OPOIFSFileSystem(new FileInputStream(vsd))).getText().length() > 50
- );
- // Publisher
- assertTrue(
- ExtractorFactory.createExtractor(new OPOIFSFileSystem(new FileInputStream(pub)))
- instanceof PublisherTextExtractor
- );
- assertTrue(
- ExtractorFactory.createExtractor(new OPOIFSFileSystem(new FileInputStream(pub))).getText().length() > 50
- );
+ private void testStream(final FunctionEx<FileInputStream, POITextExtractor> poifsIS, final boolean loadOOXML)
+ throws IOException, OpenXML4JException, XmlException {
+ for (int i = 0; i < TEST_SET.length; i += 4) {
+ File testFile = (File) TEST_SET[i + 1];
+ if (!loadOOXML && (testFile.getName().endsWith("x") || testFile.getName().endsWith("xlsb"))) {
+ continue;
+ }
+ try (FileInputStream fis = new FileInputStream(testFile);
+ POITextExtractor ext = poifsIS.apply(fis)) {
+ testExtractor(ext, (String) TEST_SET[i], (Class) TEST_SET[i + 2], (Integer) TEST_SET[i + 3]);
+ } catch (IllegalArgumentException e) {
+ fail("failed to process "+testFile);
+ }
+ }
+ }
- // Outlook msg
- assertTrue(
- ExtractorFactory.createExtractor(new OPOIFSFileSystem(new FileInputStream(msg)))
- instanceof OutlookTextExtactor
- );
- assertTrue(
- ExtractorFactory.createExtractor(new OPOIFSFileSystem(new FileInputStream(msg))).getText().length() > 50
- );
+ private void testExtractor(final POITextExtractor ext, final String testcase, final Class extrClass, final Integer minLength) {
+ assertTrue("invalid extractor for " + testcase, extrClass.isInstance(ext));
+ final String actual = ext.getText();
+ if (minLength == -1) {
+ assertContains(actual.toLowerCase(Locale.ROOT), "test");
+ } else {
+ assertTrue("extracted content too short for " + testcase, actual.length() > minLength);
+ }
+ }
+ private void testInvalid(FunctionEx<FileInputStream, POITextExtractor> poifs) throws IOException, OpenXML4JException, XmlException {
// Text
- try {
- ExtractorFactory.createExtractor(new OPOIFSFileSystem(new FileInputStream(txt)));
- fail("expected IllegalArgumentException");
- } catch(IOException e) {
- // Good
+ try (FileInputStream fis = new FileInputStream(txt);
+ POITextExtractor te = poifs.apply(fis)) {
}
}
@Test
public void testPackage() throws Exception {
- // Excel
- POIXMLTextExtractor extractor = ExtractorFactory.createExtractor(OPCPackage.open(xlsx.toString(), PackageAccess.READ));
- assertTrue(extractor instanceof XSSFExcelExtractor);
- extractor.close();
- extractor = ExtractorFactory.createExtractor(OPCPackage.open(xlsx.toString()));
- assertTrue(extractor.getText().length() > 200);
- extractor.close();
-
- // Word
- extractor = ExtractorFactory.createExtractor(OPCPackage.open(docx.toString()));
- assertTrue(extractor instanceof XWPFWordExtractor);
- extractor.close();
-
- extractor = ExtractorFactory.createExtractor(OPCPackage.open(docx.toString()));
- assertTrue(extractor.getText().length() > 120);
- extractor.close();
-
- // PowerPoint
- extractor = ExtractorFactory.createExtractor(OPCPackage.open(pptx.toString()));
- assertTrue(extractor instanceof XSLFPowerPointExtractor);
- extractor.close();
+ for (int i = 0; i < TEST_SET.length; i += 4) {
+ final File testFile = (File) TEST_SET[i + 1];
+ if (!testFile.getName().endsWith("x")) {
+ continue;
+ }
- extractor = ExtractorFactory.createExtractor(OPCPackage.open(pptx.toString()));
- assertTrue(extractor.getText().length() > 120);
- extractor.close();
-
- // Visio
- extractor = ExtractorFactory.createExtractor(OPCPackage.open(vsdx.toString()));
- assertTrue(extractor instanceof XDGFVisioExtractor);
- assertTrue(extractor.getText().length() > 20);
- extractor.close();
+ try (final OPCPackage pkg = OPCPackage.open(testFile, PackageAccess.READ);
+ final POITextExtractor ext = ExtractorFactory.createExtractor(pkg)) {
+ testExtractor(ext, (String) TEST_SET[i], (Class) TEST_SET[i + 2], (Integer) TEST_SET[i + 3]);
+ pkg.revert();
+ }
+ }
+ }
+ @Test(expected = UnsupportedFileFormatException.class)
+ public void testPackageInvalid() throws Exception {
// Text
- try {
- ExtractorFactory.createExtractor(OPCPackage.open(txt.toString()));
- fail("TestExtractorFactory.testPackage() failed on " + txt);
- } catch(UnsupportedFileFormatException e) {
- // Good
- } catch (Exception e) {
- LOG.log(POILogger.WARN, "TestExtractorFactory.testPackage() failed on " + txt);
- throw e;
- }
+ try (final OPCPackage pkg = OPCPackage.open(txt, PackageAccess.READ);
+ final POITextExtractor te = ExtractorFactory.createExtractor(pkg)) {}
}
@Test
@@ -781,142 +325,49 @@ public class TestExtractorFactory {
* does poifs embedded, but will do ooxml ones
* at some point.
*/
- @SuppressWarnings("deprecation")
@Test
public void testEmbedded() throws Exception {
- POIOLE2TextExtractor ext;
- POITextExtractor[] embeds;
-
- // No embeddings
- ext = (POIOLE2TextExtractor)
- ExtractorFactory.createExtractor(xls);
- embeds = ExtractorFactory.getEmbededDocsTextExtractors(ext);
- assertEquals(0, embeds.length);
- ext.close();
-
- // No embeddings
- ext = (POIOLE2TextExtractor)
- ExtractorFactory.createExtractor(xls);
- embeds = ExtractorFactory.getEmbeddedDocsTextExtractors(ext);
- assertEquals(0, embeds.length);
- ext.close();
-
- // Excel
- ext = (POIOLE2TextExtractor)
- ExtractorFactory.createExtractor(xlsEmb);
- embeds = ExtractorFactory.getEmbededDocsTextExtractors(ext);
- assertNotNull(embeds);
- ext.close();
-
- // Excel
- ext = (POIOLE2TextExtractor)
- ExtractorFactory.createExtractor(xlsEmb);
- embeds = ExtractorFactory.getEmbeddedDocsTextExtractors(ext);
-
- assertEquals(6, embeds.length);
- int numWord = 0, numXls = 0, numPpt = 0, numMsg = 0, numWordX;
- for (POITextExtractor embed : embeds) {
- assertTrue(embed.getText().length() > 20);
-
- if (embed instanceof PowerPointExtractor) numPpt++;
- else if (embed instanceof ExcelExtractor) numXls++;
- else if (embed instanceof WordExtractor) numWord++;
- else if (embed instanceof OutlookTextExtactor) numMsg++;
- }
- assertEquals(2, numPpt);
- assertEquals(2, numXls);
- assertEquals(2, numWord);
- assertEquals(0, numMsg);
- ext.close();
-
- // Word
- ext = (POIOLE2TextExtractor)
- ExtractorFactory.createExtractor(docEmb);
- embeds = ExtractorFactory.getEmbededDocsTextExtractors(ext);
-
- numWord = 0; numXls = 0; numPpt = 0; numMsg = 0;
- assertEquals(4, embeds.length);
- for (POITextExtractor embed : embeds) {
- assertTrue(embed.getText().length() > 20);
- if (embed instanceof PowerPointExtractor) numPpt++;
- else if (embed instanceof ExcelExtractor) numXls++;
- else if (embed instanceof WordExtractor) numWord++;
- else if (embed instanceof OutlookTextExtactor) numMsg++;
- }
- assertEquals(1, numPpt);
- assertEquals(2, numXls);
- assertEquals(1, numWord);
- assertEquals(0, numMsg);
- ext.close();
-
- // Word which contains an OOXML file
- ext = (POIOLE2TextExtractor)
- ExtractorFactory.createExtractor(docEmbOOXML);
- embeds = ExtractorFactory.getEmbededDocsTextExtractors(ext);
-
- numWord = 0; numXls = 0; numPpt = 0; numMsg = 0; numWordX = 0;
- assertEquals(3, embeds.length);
- for (POITextExtractor embed : embeds) {
- assertTrue(embed.getText().length() > 20);
- if (embed instanceof PowerPointExtractor) numPpt++;
- else if (embed instanceof ExcelExtractor) numXls++;
- else if (embed instanceof WordExtractor) numWord++;
- else if (embed instanceof OutlookTextExtactor) numMsg++;
- else if (embed instanceof XWPFWordExtractor) numWordX++;
- }
- assertEquals(1, numPpt);
- assertEquals(1, numXls);
- assertEquals(0, numWord);
- assertEquals(1, numWordX);
- assertEquals(0, numMsg);
- ext.close();
-
- // Outlook
- ext = (OutlookTextExtactor)
- ExtractorFactory.createExtractor(msgEmb);
- embeds = ExtractorFactory.getEmbededDocsTextExtractors(ext);
-
- numWord = 0; numXls = 0; numPpt = 0; numMsg = 0;
- assertEquals(1, embeds.length);
- for (POITextExtractor embed : embeds) {
- assertTrue(embed.getText().length() > 20);
- if (embed instanceof PowerPointExtractor) numPpt++;
- else if (embed instanceof ExcelExtractor) numXls++;
- else if (embed instanceof WordExtractor) numWord++;
- else if (embed instanceof OutlookTextExtactor) numMsg++;
- }
- assertEquals(0, numPpt);
- assertEquals(0, numXls);
- assertEquals(1, numWord);
- assertEquals(0, numMsg);
- ext.close();
-
- // Outlook with another outlook file in it
- ext = (OutlookTextExtactor)
- ExtractorFactory.createExtractor(msgEmbMsg);
- embeds = ExtractorFactory.getEmbededDocsTextExtractors(ext);
-
- numWord = 0; numXls = 0; numPpt = 0; numMsg = 0;
- assertEquals(1, embeds.length);
- for (POITextExtractor embed : embeds) {
- assertTrue(embed.getText().length() > 20);
- if (embed instanceof PowerPointExtractor) numPpt++;
- else if (embed instanceof ExcelExtractor) numXls++;
- else if (embed instanceof WordExtractor) numWord++;
- else if (embed instanceof OutlookTextExtactor) numMsg++;
+ final Object[] testObj = {
+ "No embeddings", xls, "0-0-0-0-0-0",
+ "Excel", xlsEmb, "6-2-2-2-0-0",
+ "Word", docEmb, "4-1-2-1-0-0",
+ "Word which contains an OOXML file", docEmbOOXML, "3-0-1-1-0-1",
+ "Outlook", msgEmb, "1-1-0-0-0-0",
+ "Outlook with another outlook file in it", msgEmbMsg, "1-0-0-0-1-0",
+ };
+
+ for (int i=0; i<testObj.length; i+=3) {
+ try (final POIOLE2TextExtractor ext = ExtractorFactory.createExtractor((File)testObj[i+1])) {
+ final POITextExtractor[] embeds = ExtractorFactory.getEmbeddedDocsTextExtractors(ext);
+
+ int numWord = 0, numXls = 0, numPpt = 0, numMsg = 0, numWordX = 0;
+ for (POITextExtractor embed : embeds) {
+ assertTrue(embed.getText().length() > 20);
+ if (embed instanceof SlideShowExtractor) {
+ numPpt++;
+ } else if (embed instanceof ExcelExtractor) {
+ numXls++;
+ } else if (embed instanceof WordExtractor) {
+ numWord++;
+ } else if (embed instanceof OutlookTextExtactor) {
+ numMsg++;
+ } else if (embed instanceof XWPFWordExtractor) {
+ numWordX++;
+ }
+ }
+
+ final String actual = embeds.length+"-"+numWord+"-"+numXls+"-"+numPpt+"-"+numMsg+"-"+numWordX;
+ final String expected = (String)testObj[i+2];
+ assertEquals("invalid number of embeddings - "+testObj[i], expected, actual);
+ }
}
- assertEquals(0, numPpt);
- assertEquals(0, numXls);
- assertEquals(0, numWord);
- assertEquals(1, numMsg);
- ext.close();
// TODO - PowerPoint
// TODO - Publisher
// TODO - Visio
}
- private static final String[] EXPECTED_FAILURES = new String[] {
+ private static final String[] EXPECTED_FAILURES = {
// password protected files
"spreadsheet/password.xls",
"spreadsheet/protected_passtika.xlsx",
@@ -1018,37 +469,26 @@ public class TestExtractorFactory {
* #59074 - Excel 95 files should give a helpful message, not just
* "No supported documents found in the OLE2 stream"
*/
- @Test
+ @Test(expected = OldExcelFormatException.class)
public void bug59074() throws Exception {
- try {
- ExtractorFactory.createExtractor(
- POIDataSamples.getSpreadSheetInstance().getFile("59074.xls"));
- fail("Old excel formats not supported via ExtractorFactory");
- } catch (OldExcelFormatException e) {
- // expected here
- }
+ ExtractorFactory.createExtractor(
+ POIDataSamples.getSpreadSheetInstance().getFile("59074.xls"));
}
@SuppressWarnings("deprecation")
- @Test
- public void testGetEmbeddedFromXMLExtractor() {
- try {
- // currently not implemented
- ExtractorFactory.getEmbededDocsTextExtractors((POIXMLTextExtractor)null);
- fail("Unsupported currently");
- } catch (IllegalStateException e) {
- // expected here
- }
+ @Test(expected = IllegalStateException.class)
+ public void testGetEmbedFromXMLExtractor() {
+ // currently not implemented
+ ExtractorFactory.getEmbededDocsTextExtractors((POIXMLTextExtractor) null);
+ }
- try {
- // currently not implemented
- ExtractorFactory.getEmbeddedDocsTextExtractors((POIXMLTextExtractor)null);
- fail("Unsupported currently");
- } catch (IllegalStateException e) {
- // expected here
- }
+ @SuppressWarnings("deprecation")
+ @Test(expected = IllegalStateException.class)
+ public void testGetEmbeddedFromXMLExtractor() {
+ // currently not implemented
+ ExtractorFactory.getEmbeddedDocsTextExtractors((POIXMLTextExtractor)null);
}
-
+
// This bug is currently open. This test will fail with "expected error not thrown" when the bug has been fixed.
// When this happens, change this from @Test(expected=...) to @Test
// bug 45565: text within TextBoxes is extracted by ExcelExtractor and WordExtractor
Modified: poi/trunk/src/ooxml/testcases/org/apache/poi/poifs/crypt/TestHxxFEncryption.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/testcases/org/apache/poi/poifs/crypt/TestHxxFEncryption.java?rev=1829653&r1=1829652&r2=1829653&view=diff
==============================================================================
--- poi/trunk/src/ooxml/testcases/org/apache/poi/poifs/crypt/TestHxxFEncryption.java (original)
+++ poi/trunk/src/ooxml/testcases/org/apache/poi/poifs/crypt/TestHxxFEncryption.java Fri Apr 20 12:52:59 2018
@@ -120,10 +120,10 @@ public class TestHxxFEncryption {
public void newPassword(String newPass) throws IOException, OpenXML4JException, XmlException {
Biff8EncryptionKey.setCurrentUserPassword(password);
File f = sampleDir.getFile(file);
- POIOLE2TextExtractor te1 = (POIOLE2TextExtractor)ExtractorFactory.createExtractor(f);
+ POITextExtractor te1 = ExtractorFactory.createExtractor(f);
Biff8EncryptionKey.setCurrentUserPassword(newPass);
ByteArrayOutputStream bos = new ByteArrayOutputStream();
- POIDocument doc = te1.getDocument();
+ POIDocument doc = (POIDocument)te1.getDocument();
doc.write(bos);
doc.close();
te1.close();
@@ -140,25 +140,25 @@ public class TestHxxFEncryption {
ByteArrayOutputStream bos = new ByteArrayOutputStream();
Biff8EncryptionKey.setCurrentUserPassword(password);
File f = sampleDir.getFile(file);
- POIOLE2TextExtractor te1 = (POIOLE2TextExtractor)ExtractorFactory.createExtractor(f);
+ POITextExtractor te1 = ExtractorFactory.createExtractor(f);
// first remove encryption
Biff8EncryptionKey.setCurrentUserPassword(null);
- POIDocument doc = te1.getDocument();
+ POIDocument doc = (POIDocument)te1.getDocument();
doc.write(bos);
doc.close();
te1.close();
// then use default setting, which is cryptoapi
String newPass = "newPass";
- POIOLE2TextExtractor te2 = (POIOLE2TextExtractor)ExtractorFactory.createExtractor(new ByteArrayInputStream(bos.toByteArray()));
+ POITextExtractor te2 = ExtractorFactory.createExtractor(new ByteArrayInputStream(bos.toByteArray()));
Biff8EncryptionKey.setCurrentUserPassword(newPass);
- doc = te2.getDocument();
+ doc = (POIDocument)te2.getDocument();
bos.reset();
doc.write(bos);
doc.close();
te2.close();
// and finally update cryptoapi setting
- POIOLE2TextExtractor te3 = (POIOLE2TextExtractor)ExtractorFactory.createExtractor(new ByteArrayInputStream(bos.toByteArray()));
- doc = te3.getDocument();
+ POITextExtractor te3 = ExtractorFactory.createExtractor(new ByteArrayInputStream(bos.toByteArray()));
+ doc = (POIDocument)te3.getDocument();
// need to cache data (i.e. read all data) before changing the key size
if (doc instanceof HSLFSlideShowImpl) {
HSLFSlideShowImpl hss = (HSLFSlideShowImpl)doc;
@@ -175,8 +175,8 @@ public class TestHxxFEncryption {
doc.close();
te3.close();
// check the setting
- POIOLE2TextExtractor te4 = (POIOLE2TextExtractor)ExtractorFactory.createExtractor(new ByteArrayInputStream(bos.toByteArray()));
- doc = te4.getDocument();
+ POITextExtractor te4 = ExtractorFactory.createExtractor(new ByteArrayInputStream(bos.toByteArray()));
+ doc = (POIDocument)te4.getDocument();
ei = doc.getEncryptionInfo();
assertNotNull(ei);
assertTrue(ei.getHeader() instanceof CryptoAPIEncryptionHeader);
Modified: poi/trunk/src/ooxml/testcases/org/apache/poi/xslf/TestXSLFBugs.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/testcases/org/apache/poi/xslf/TestXSLFBugs.java?rev=1829653&r1=1829652&r2=1829653&view=diff
==============================================================================
--- poi/trunk/src/ooxml/testcases/org/apache/poi/xslf/TestXSLFBugs.java (original)
+++ poi/trunk/src/ooxml/testcases/org/apache/poi/xslf/TestXSLFBugs.java Fri Apr 20 12:52:59 2018
@@ -50,6 +50,7 @@ import org.apache.poi.openxml4j.opc.OPCP
import org.apache.poi.openxml4j.opc.PackagePartName;
import org.apache.poi.openxml4j.opc.PackagingURIHelper;
import org.apache.poi.sl.draw.DrawPaint;
+import org.apache.poi.sl.extractor.SlideShowExtractor;
import org.apache.poi.sl.usermodel.PaintStyle;
import org.apache.poi.sl.usermodel.PaintStyle.SolidPaint;
import org.apache.poi.sl.usermodel.PaintStyle.TexturePaint;
@@ -221,28 +222,27 @@ public class TestXSLFBugs {
* rID2 -> slide3.xml
*/
@Test
- public void bug54916() throws Exception {
- XMLSlideShow ss = XSLFTestDataSamples.openSampleDocument("OverlappingRelations.pptx");
- XSLFSlide slide;
-
- // Should find 4 slides
- assertEquals(4, ss.getSlides().size());
-
- // Check the text, to see we got them in order
- slide = ss.getSlides().get(0);
- assertContains(getSlideText(slide), "POI cannot read this");
-
- slide = ss.getSlides().get(1);
- assertContains(getSlideText(slide), "POI can read this");
- assertContains(getSlideText(slide), "Has a relationship to another slide");
+ public void bug54916() throws IOException {
+ try (XMLSlideShow ss = XSLFTestDataSamples.openSampleDocument("OverlappingRelations.pptx")) {
+ XSLFSlide slide;
+
+ // Should find 4 slides
+ assertEquals(4, ss.getSlides().size());
+
+ // Check the text, to see we got them in order
+ slide = ss.getSlides().get(0);
+ assertContains(getSlideText(ss, slide), "POI cannot read this");
+
+ slide = ss.getSlides().get(1);
+ assertContains(getSlideText(ss, slide), "POI can read this");
+ assertContains(getSlideText(ss, slide), "Has a relationship to another slide");
- slide = ss.getSlides().get(2);
- assertContains(getSlideText(slide), "POI can read this");
+ slide = ss.getSlides().get(2);
+ assertContains(getSlideText(ss, slide), "POI can read this");
- slide = ss.getSlides().get(3);
- assertContains(getSlideText(slide), "POI can read this");
-
- ss.close();
+ slide = ss.getSlides().get(3);
+ assertContains(getSlideText(ss, slide), "POI can read this");
+ }
}
/**
@@ -311,8 +311,15 @@ public class TestXSLFBugs {
ss.close();
}
- protected String getSlideText(XSLFSlide slide) {
- return XSLFPowerPointExtractor.getText(slide, true, false, false);
+ protected String getSlideText(XMLSlideShow ppt, XSLFSlide slide) throws IOException {
+ try (SlideShowExtractor extr = new SlideShowExtractor(ppt)) {
+ // do not auto-close the slideshow
+ extr.setFilesystem(null);
+ extr.setSlidesByDefault(true);
+ extr.setNotesByDefault(false);
+ extr.setMasterByDefault(false);
+ return extr.getText(slide);
+ }
}
@Test
@@ -458,7 +465,7 @@ public class TestXSLFBugs {
for (int i = 0; i < slideTexts.length; i++) {
XSLFSlide slide = ss.getSlides().get(i);
- assertContains(getSlideText(slide), slideTexts[i]);
+ assertContains(getSlideText(ss, slide), slideTexts[i]);
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@poi.apache.org
For additional commands, e-mail: commits-help@poi.apache.org