You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2018/09/06 15:30:29 UTC

[tika] 02/02: TIKA-2552 -- upgrade to POI 4.0.0 -- fix merge conflicts

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch branch_1x
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 49ed3099f557349531076f7265db1b453a9627fb
Author: TALLISON <ta...@apache.org>
AuthorDate: Thu Sep 6 11:17:13 2018 -0400

    TIKA-2552 -- upgrade to POI 4.0.0 -- fix merge conflicts
---
 .../parser/microsoft/ooxml/MetadataExtractor.java  | 109 +++++++++++++--------
 1 file changed, 70 insertions(+), 39 deletions(-)

diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/MetadataExtractor.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/MetadataExtractor.java
index 30f2975..e5da8ce 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/MetadataExtractor.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/MetadataExtractor.java
@@ -16,17 +16,12 @@
  */
 package org.apache.tika.parser.microsoft.ooxml;
 
-import java.math.BigDecimal;
-import java.util.Date;
-
-import org.apache.poi.POIXMLProperties.CoreProperties;
-import org.apache.poi.POIXMLProperties.CustomProperties;
-import org.apache.poi.POIXMLProperties.ExtendedProperties;
-import org.apache.poi.POIXMLTextExtractor;
+import org.apache.poi.ooxml.POIXMLProperties;
+import org.apache.poi.ooxml.extractor.POIXMLTextExtractor;
 import org.apache.poi.openxml4j.opc.internal.PackagePropertiesPart;
-import org.apache.poi.openxml4j.util.Nullable;
 import org.apache.poi.xssf.extractor.XSSFEventBasedExcelExtractor;
 import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.MSOffice;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.metadata.Office;
 import org.apache.tika.metadata.OfficeOpenXMLCore;
@@ -42,6 +37,10 @@ import org.apache.xmlbeans.impl.values.XmlValueOutOfRangeException;
 import org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProperty;
 import org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTProperties;
 
+import java.math.BigDecimal;
+import java.util.Date;
+import java.util.Optional;
+
 /**
  * OOXML metadata extractor.
  * <p/>
@@ -70,7 +69,7 @@ public class MetadataExtractor {
         }
     }
 
-    private void extractMetadata(CoreProperties properties, Metadata metadata) {
+    private void extractMetadata(POIXMLProperties.CoreProperties properties, Metadata metadata) {
         PackagePropertiesPart propsHolder = properties
                 .getUnderlyingProperties();
 
@@ -87,7 +86,7 @@ public class MetadataExtractor {
                 .getIdentifierProperty());
         addProperty(metadata, OfficeOpenXMLCore.SUBJECT,
                 propsHolder.getSubjectProperty());
-        addProperty(metadata, Office.KEYWORDS, propsHolder
+        addProperty(metadata, TikaCoreProperties.KEYWORDS, propsHolder
                 .getKeywordsProperty());
         setProperty(metadata, TikaCoreProperties.LANGUAGE, propsHolder
                 .getLanguageProperty());
@@ -99,13 +98,23 @@ public class MetadataExtractor {
                 .getModifiedProperty());
         setProperty(metadata, OfficeOpenXMLCore.REVISION, propsHolder
                 .getRevisionProperty());
-
+        // TODO: Move to OO subject in Tika 2.0
+        setProperty(metadata, TikaCoreProperties.TRANSITION_SUBJECT_TO_OO_SUBJECT,
+                propsHolder.getSubjectProperty());
         setProperty(metadata, TikaCoreProperties.TITLE, propsHolder.getTitleProperty());
         setProperty(metadata, OfficeOpenXMLCore.VERSION, propsHolder.getVersionProperty());
 
+        // Legacy Tika-1.0 style stats
+        // TODO Remove these in Tika 2.0
+        setProperty(metadata, Metadata.CATEGORY, propsHolder.getCategoryProperty());
+        setProperty(metadata, Metadata.CONTENT_STATUS, propsHolder
+                .getContentStatusProperty());
+        setProperty(metadata, Metadata.REVISION_NUMBER, propsHolder
+                .getRevisionProperty());
+        setProperty(metadata, Metadata.VERSION, propsHolder.getVersionProperty());
     }
 
-    private void extractMetadata(ExtendedProperties properties,
+    private void extractMetadata(POIXMLProperties.ExtendedProperties properties,
                                  Metadata metadata) {
         CTProperties propsHolder = properties.getUnderlyingProperties();
 
@@ -143,9 +152,26 @@ public class MetadataExtractor {
         setProperty(metadata, Office.WORD_COUNT, propsHolder.getWords());
         setProperty(metadata, Office.CHARACTER_COUNT, propsHolder.getCharacters());
         setProperty(metadata, Office.CHARACTER_COUNT_WITH_SPACES, propsHolder.getCharactersWithSpaces());
+
+        // Legacy Tika-1.0 style stats
+        // TODO Remove these in Tika 2.0
+        setProperty(metadata, Metadata.APPLICATION_NAME, propsHolder.getApplication());
+        setProperty(metadata, Metadata.APPLICATION_VERSION, propsHolder.getAppVersion());
+        setProperty(metadata, Metadata.MANAGER, propsHolder.getManager());
+        setProperty(metadata, Metadata.NOTES, propsHolder.getNotes());
+        setProperty(metadata, Metadata.PRESENTATION_FORMAT, propsHolder.getPresentationFormat());
+        setProperty(metadata, Metadata.TEMPLATE, propsHolder.getTemplate());
+        setProperty(metadata, Metadata.TOTAL_TIME, totalTime);
+        setProperty(metadata, MSOffice.PAGE_COUNT, propsHolder.getPages());
+        setProperty(metadata, MSOffice.SLIDE_COUNT, propsHolder.getSlides());
+        setProperty(metadata, MSOffice.PARAGRAPH_COUNT, propsHolder.getParagraphs());
+        setProperty(metadata, MSOffice.LINE_COUNT, propsHolder.getLines());
+        setProperty(metadata, MSOffice.WORD_COUNT, propsHolder.getWords());
+        setProperty(metadata, MSOffice.CHARACTER_COUNT, propsHolder.getCharacters());
+        setProperty(metadata, MSOffice.CHARACTER_COUNT_WITH_SPACES, propsHolder.getCharactersWithSpaces());
     }
 
-    private void extractMetadata(CustomProperties properties,
+    private void extractMetadata(POIXMLProperties.CustomProperties properties,
                                  Metadata metadata) {
         org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProperties
                 props = properties.getUnderlyingProperties();
@@ -229,35 +255,40 @@ public class MetadataExtractor {
         }
     }
 
-    private <T> void setProperty(Metadata metadata, Property property, Nullable<T> nullableValue) {
-        T value = nullableValue.getValue();
-        if (value != null) {
-            if (value instanceof Date) {
-                metadata.set(property, (Date) value);
-            } else if (value instanceof String) {
-                metadata.set(property, (String) value);
-            } else if (value instanceof Integer) {
-                metadata.set(property, (Integer) value);
-            } else if (value instanceof Double) {
-                metadata.set(property, (Double) value);
-            }
+    private <T> void setProperty(Metadata metadata, Property property, Optional<T> nullableValue) {
+        if (!nullableValue.isPresent()) {
+            return;
+        }
+        T value = nullableValue.get();
+        if (value instanceof Date) {
+            metadata.set(property, (Date) value);
+        } else if (value instanceof String) {
+            metadata.set(property, (String) value);
+        } else if (value instanceof Integer) {
+            metadata.set(property, (Integer) value);
+        } else if (value instanceof Double) {
+            metadata.set(property, (Double) value);
         }
     }
 
-    private <T> void addProperty(Metadata metadata, Property property, Nullable<T> nullableValue) {
-        T value = nullableValue.getValue();
-        if (value != null) {
-            if (value instanceof String) {
-                metadata.add(property, (String) value);
-            } else {
-                throw new IllegalArgumentException("Can't add property of class: "+nullableValue.getClass());
-            }
+    private <T> void addProperty(Metadata metadata, Property property, Optional<T> nullableValue) {
+        if (!nullableValue.isPresent()) {
+            return;
+        }
+        T value = nullableValue.get();
+        if (value instanceof String) {
+            metadata.add(property, (String) value);
+        } else {
+            throw new IllegalArgumentException("Can't add property of class: " + nullableValue.getClass());
         }
     }
-    private void setProperty(Metadata metadata, String name, Nullable<?> value) {
-        if (value.getValue() != null) {
-            setProperty(metadata, name, value.getValue().toString());
+
+    private void setProperty(Metadata metadata, String property, Optional<String> nullableValue) {
+        if (!nullableValue.isPresent()) {
+            return;
         }
+        String value = nullableValue.get();
+        metadata.set(property, value);
     }
 
     private void setProperty(Metadata metadata, Property property, String value) {
@@ -284,11 +315,11 @@ public class MetadataExtractor {
         }
     }
 
-    private void addMultiProperty(Metadata metadata, Property property, Nullable<String> value) {
-        if (value == null) {
+    private void addMultiProperty(Metadata metadata, Property property, Optional<String> value) {
+        if (!value.isPresent()) {
             return;
         }
-        SummaryExtractor.addMulti(metadata, property, value.getValue());
+        SummaryExtractor.addMulti(metadata, property, value.get());
     }
 
 }