You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2016/02/02 21:20:02 UTC

tika git commit: TIKA-1799: upgrade to POI 3.14-beta1 for 2.x branch

Repository: tika
Updated Branches:
  refs/heads/2.x 9fb7fa9ab -> de9e28cfa


TIKA-1799: upgrade to POI 3.14-beta1 for 2.x branch


Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/de9e28cf
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/de9e28cf
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/de9e28cf

Branch: refs/heads/2.x
Commit: de9e28cfadef2b8707807175a8f0cae1cc8dedff
Parents: 9fb7fa9
Author: tballison <ta...@mitre.org>
Authored: Tue Feb 2 15:19:54 2016 -0500
Committer: tballison <ta...@mitre.org>
Committed: Tue Feb 2 15:19:54 2016 -0500

----------------------------------------------------------------------
 CHANGES.txt                                                 | 2 ++
 tika-bundle/pom.xml                                         | 5 +++--
 tika-parser-modules/pom.xml                                 | 2 +-
 .../microsoft/ooxml/XSLFPowerPointExtractorDecorator.java   | 9 ++++-----
 .../apache/tika/parser/microsoft/PowerPointParserTest.java  | 4 ++--
 5 files changed, 12 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tika/blob/de9e28cf/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index e452ce0..b2e88e2 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -10,6 +10,8 @@ Release 2.0 - Future Development
 
 Release 1.13 - ???
 
+  * Upgrade to POI 3.14-beta1 (TIKA-1799).
+
   * Upgrade to PDFBox 1.8.11 (TIKA-1830).
 
 Release 1.12 - 01/24/2016

http://git-wip-us.apache.org/repos/asf/tika/blob/de9e28cf/tika-bundle/pom.xml
----------------------------------------------------------------------
diff --git a/tika-bundle/pom.xml b/tika-bundle/pom.xml
index 05cc1d6..f9aa71b 100644
--- a/tika-bundle/pom.xml
+++ b/tika-bundle/pom.xml
@@ -129,6 +129,7 @@
               commons-io, commons-exec, junrar,
               pdfbox,fontbox,jempbox,bcmail-jdk15on,bcprov-jdk15on,bcpkix-jdk15on,
               poi,poi-scratchpad,poi-ooxml,poi-ooxml-schemas,
+              curvesapi,
               xmlbeans,
               jackcess,
               commons-lang,
@@ -264,8 +265,8 @@
               org.xml.sax.ext;resolution:=optional,
               org.xml.sax.helpers;resolution:=optional,
               org.xmlpull.v1;resolution:=optional,
-              schemasMicrosoftComOfficePowerpoint;resolution:=optional,
-              schemasMicrosoftComOfficeWord;resolution:=optional,
+              com.microsoft.schemas.office.powerpoint;resolution:=optional,
+              com.microsoft.schemas.office.word;resolution:=optional,              sun.misc;resolution:=optional,
               sun.misc;resolution:=optional,
               ucar.units;resolution:=optional,
               ucar.httpservices;resolution:=optional,

http://git-wip-us.apache.org/repos/asf/tika/blob/de9e28cf/tika-parser-modules/pom.xml
----------------------------------------------------------------------
diff --git a/tika-parser-modules/pom.xml b/tika-parser-modules/pom.xml
index 196ce58..932a058 100644
--- a/tika-parser-modules/pom.xml
+++ b/tika-parser-modules/pom.xml
@@ -35,7 +35,7 @@
   <url>http://tika.apache.org/</url>
   
   <properties>
-    <poi.version>3.13</poi.version>
+    <poi.version>3.14-beta1</poi.version>
     <!-- NOTE: sync codec version with POI -->
     <codec.version>1.9</codec.version>
     <pdfbox.version>1.8.11</pdfbox.version>

http://git-wip-us.apache.org/repos/asf/tika/blob/de9e28cf/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSLFPowerPointExtractorDecorator.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSLFPowerPointExtractorDecorator.java b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSLFPowerPointExtractorDecorator.java
index 53cb6c1..71469ce 100644
--- a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSLFPowerPointExtractorDecorator.java
+++ b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSLFPowerPointExtractorDecorator.java
@@ -27,9 +27,8 @@ import org.apache.poi.openxml4j.opc.PackagePartName;
 import org.apache.poi.openxml4j.opc.PackageRelationship;
 import org.apache.poi.openxml4j.opc.PackagingURIHelper;
 import org.apache.poi.openxml4j.opc.TargetMode;
-import org.apache.poi.xslf.XSLFSlideShow;
+import org.apache.poi.sl.usermodel.SimpleShape;
 import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor;
-import org.apache.poi.xslf.usermodel.Placeholder;
 import org.apache.poi.xslf.usermodel.XMLSlideShow;
 import org.apache.poi.xslf.usermodel.XSLFCommentAuthors;
 import org.apache.poi.xslf.usermodel.XSLFComments;
@@ -43,6 +42,7 @@ import org.apache.poi.xslf.usermodel.XSLFShape;
 import org.apache.poi.xslf.usermodel.XSLFSheet;
 import org.apache.poi.xslf.usermodel.XSLFSlide;
 import org.apache.poi.xslf.usermodel.XSLFSlideLayout;
+import org.apache.poi.xslf.usermodel.XSLFSlideShow;
 import org.apache.poi.xslf.usermodel.XSLFTable;
 import org.apache.poi.xslf.usermodel.XSLFTableCell;
 import org.apache.poi.xslf.usermodel.XSLFTableRow;
@@ -151,7 +151,7 @@ public class XSLFPowerPointExtractorDecorator extends AbstractOOXMLExtractor {
         for (XSLFShape sh : shapes) {
             if (sh instanceof XSLFTextShape) {
                 XSLFTextShape txt = (XSLFTextShape) sh;
-                Placeholder ph = txt.getTextType();
+                SimpleShape.Placeholder ph = txt.getTextType();
                 if (skipPlaceholders && ph != null) {
                     continue;
                 }
@@ -229,10 +229,9 @@ public class XSLFPowerPointExtractorDecorator extends AbstractOOXMLExtractor {
     @Override
     protected List<PackagePart> getMainDocumentParts() throws TikaException {
         List<PackagePart> parts = new ArrayList<>();
-        XMLSlideShow slideShow = (XMLSlideShow) extractor.getDocument();
         XSLFSlideShow document = null;
         try {
-            document = slideShow._getXSLFSlideShow(); // TODO Avoid this in future
+            document = new XSLFSlideShow(extractor.getPackage());
         } catch (Exception e) {
             throw new TikaException(e.getMessage()); // Shouldn't happen
         }

http://git-wip-us.apache.org/repos/asf/tika/blob/de9e28cf/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/microsoft/PowerPointParserTest.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/microsoft/PowerPointParserTest.java b/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/microsoft/PowerPointParserTest.java
index 35b341d..b1e1241 100644
--- a/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/microsoft/PowerPointParserTest.java
+++ b/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/microsoft/PowerPointParserTest.java
@@ -79,14 +79,14 @@ public class PowerPointParserTest extends TikaTest {
         for(int row=1;row<=3;row++) {
             //assertContains("ยท\tBullet " + row, content);
             //assertContains("\u00b7\tBullet " + row, content);
-            assertContains("<p>Bullet " + row, xml);
+            assertContains("<li>Bullet " + row, xml);
         }
         assertContains("Here is a numbered list:", xml);
         for(int row=1;row<=3;row++) {
             //assertContains(row + ")\tNumber bullet " + row, content);
             //assertContains(row + ") Number bullet " + row, content);
             // TODO: OOXMLExtractor fails to number the bullets:
-            assertContains("<p>Number bullet " + row, xml);
+            assertContains("<li>Number bullet " + row, xml);
         }
 
         for(int row=1;row<=2;row++) {