You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2016/02/02 21:20:02 UTC
tika git commit: TIKA-1799: upgrade to POI 3.14-beta1 for 2.x branch
Repository: tika
Updated Branches:
refs/heads/2.x 9fb7fa9ab -> de9e28cfa
TIKA-1799: upgrade to POI 3.14-beta1 for 2.x branch
Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/de9e28cf
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/de9e28cf
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/de9e28cf
Branch: refs/heads/2.x
Commit: de9e28cfadef2b8707807175a8f0cae1cc8dedff
Parents: 9fb7fa9
Author: tballison <ta...@mitre.org>
Authored: Tue Feb 2 15:19:54 2016 -0500
Committer: tballison <ta...@mitre.org>
Committed: Tue Feb 2 15:19:54 2016 -0500
----------------------------------------------------------------------
CHANGES.txt | 2 ++
tika-bundle/pom.xml | 5 +++--
tika-parser-modules/pom.xml | 2 +-
.../microsoft/ooxml/XSLFPowerPointExtractorDecorator.java | 9 ++++-----
.../apache/tika/parser/microsoft/PowerPointParserTest.java | 4 ++--
5 files changed, 12 insertions(+), 10 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tika/blob/de9e28cf/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index e452ce0..b2e88e2 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -10,6 +10,8 @@ Release 2.0 - Future Development
Release 1.13 - ???
+ * Upgrade to POI 3.14-beta1 (TIKA-1799).
+
* Upgrade to PDFBox 1.8.11 (TIKA-1830).
Release 1.12 - 01/24/2016
http://git-wip-us.apache.org/repos/asf/tika/blob/de9e28cf/tika-bundle/pom.xml
----------------------------------------------------------------------
diff --git a/tika-bundle/pom.xml b/tika-bundle/pom.xml
index 05cc1d6..f9aa71b 100644
--- a/tika-bundle/pom.xml
+++ b/tika-bundle/pom.xml
@@ -129,6 +129,7 @@
commons-io, commons-exec, junrar,
pdfbox,fontbox,jempbox,bcmail-jdk15on,bcprov-jdk15on,bcpkix-jdk15on,
poi,poi-scratchpad,poi-ooxml,poi-ooxml-schemas,
+ curvesapi,
xmlbeans,
jackcess,
commons-lang,
@@ -264,8 +265,8 @@
org.xml.sax.ext;resolution:=optional,
org.xml.sax.helpers;resolution:=optional,
org.xmlpull.v1;resolution:=optional,
- schemasMicrosoftComOfficePowerpoint;resolution:=optional,
- schemasMicrosoftComOfficeWord;resolution:=optional,
+ com.microsoft.schemas.office.powerpoint;resolution:=optional,
+ com.microsoft.schemas.office.word;resolution:=optional, sun.misc;resolution:=optional,
sun.misc;resolution:=optional,
ucar.units;resolution:=optional,
ucar.httpservices;resolution:=optional,
http://git-wip-us.apache.org/repos/asf/tika/blob/de9e28cf/tika-parser-modules/pom.xml
----------------------------------------------------------------------
diff --git a/tika-parser-modules/pom.xml b/tika-parser-modules/pom.xml
index 196ce58..932a058 100644
--- a/tika-parser-modules/pom.xml
+++ b/tika-parser-modules/pom.xml
@@ -35,7 +35,7 @@
<url>http://tika.apache.org/</url>
<properties>
- <poi.version>3.13</poi.version>
+ <poi.version>3.14-beta1</poi.version>
<!-- NOTE: sync codec version with POI -->
<codec.version>1.9</codec.version>
<pdfbox.version>1.8.11</pdfbox.version>
http://git-wip-us.apache.org/repos/asf/tika/blob/de9e28cf/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSLFPowerPointExtractorDecorator.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSLFPowerPointExtractorDecorator.java b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSLFPowerPointExtractorDecorator.java
index 53cb6c1..71469ce 100644
--- a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSLFPowerPointExtractorDecorator.java
+++ b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSLFPowerPointExtractorDecorator.java
@@ -27,9 +27,8 @@ import org.apache.poi.openxml4j.opc.PackagePartName;
import org.apache.poi.openxml4j.opc.PackageRelationship;
import org.apache.poi.openxml4j.opc.PackagingURIHelper;
import org.apache.poi.openxml4j.opc.TargetMode;
-import org.apache.poi.xslf.XSLFSlideShow;
+import org.apache.poi.sl.usermodel.SimpleShape;
import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor;
-import org.apache.poi.xslf.usermodel.Placeholder;
import org.apache.poi.xslf.usermodel.XMLSlideShow;
import org.apache.poi.xslf.usermodel.XSLFCommentAuthors;
import org.apache.poi.xslf.usermodel.XSLFComments;
@@ -43,6 +42,7 @@ import org.apache.poi.xslf.usermodel.XSLFShape;
import org.apache.poi.xslf.usermodel.XSLFSheet;
import org.apache.poi.xslf.usermodel.XSLFSlide;
import org.apache.poi.xslf.usermodel.XSLFSlideLayout;
+import org.apache.poi.xslf.usermodel.XSLFSlideShow;
import org.apache.poi.xslf.usermodel.XSLFTable;
import org.apache.poi.xslf.usermodel.XSLFTableCell;
import org.apache.poi.xslf.usermodel.XSLFTableRow;
@@ -151,7 +151,7 @@ public class XSLFPowerPointExtractorDecorator extends AbstractOOXMLExtractor {
for (XSLFShape sh : shapes) {
if (sh instanceof XSLFTextShape) {
XSLFTextShape txt = (XSLFTextShape) sh;
- Placeholder ph = txt.getTextType();
+ SimpleShape.Placeholder ph = txt.getTextType();
if (skipPlaceholders && ph != null) {
continue;
}
@@ -229,10 +229,9 @@ public class XSLFPowerPointExtractorDecorator extends AbstractOOXMLExtractor {
@Override
protected List<PackagePart> getMainDocumentParts() throws TikaException {
List<PackagePart> parts = new ArrayList<>();
- XMLSlideShow slideShow = (XMLSlideShow) extractor.getDocument();
XSLFSlideShow document = null;
try {
- document = slideShow._getXSLFSlideShow(); // TODO Avoid this in future
+ document = new XSLFSlideShow(extractor.getPackage());
} catch (Exception e) {
throw new TikaException(e.getMessage()); // Shouldn't happen
}
http://git-wip-us.apache.org/repos/asf/tika/blob/de9e28cf/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/microsoft/PowerPointParserTest.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/microsoft/PowerPointParserTest.java b/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/microsoft/PowerPointParserTest.java
index 35b341d..b1e1241 100644
--- a/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/microsoft/PowerPointParserTest.java
+++ b/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/microsoft/PowerPointParserTest.java
@@ -79,14 +79,14 @@ public class PowerPointParserTest extends TikaTest {
for(int row=1;row<=3;row++) {
//assertContains("ยท\tBullet " + row, content);
//assertContains("\u00b7\tBullet " + row, content);
- assertContains("<p>Bullet " + row, xml);
+ assertContains("<li>Bullet " + row, xml);
}
assertContains("Here is a numbered list:", xml);
for(int row=1;row<=3;row++) {
//assertContains(row + ")\tNumber bullet " + row, content);
//assertContains(row + ") Number bullet " + row, content);
// TODO: OOXMLExtractor fails to number the bullets:
- assertContains("<p>Number bullet " + row, xml);
+ assertContains("<li>Number bullet " + row, xml);
}
for(int row=1;row<=2;row++) {