You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2018/09/06 15:30:29 UTC
[tika] 02/02: TIKA-2552 -- upgrade to POI 4.0.0 -- fix merge
conflicts
This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch branch_1x
in repository https://gitbox.apache.org/repos/asf/tika.git
commit 49ed3099f557349531076f7265db1b453a9627fb
Author: TALLISON <ta...@apache.org>
AuthorDate: Thu Sep 6 11:17:13 2018 -0400
TIKA-2552 -- upgrade to POI 4.0.0 -- fix merge conflicts
---
.../parser/microsoft/ooxml/MetadataExtractor.java | 109 +++++++++++++--------
1 file changed, 70 insertions(+), 39 deletions(-)
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/MetadataExtractor.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/MetadataExtractor.java
index 30f2975..e5da8ce 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/MetadataExtractor.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/MetadataExtractor.java
@@ -16,17 +16,12 @@
*/
package org.apache.tika.parser.microsoft.ooxml;
-import java.math.BigDecimal;
-import java.util.Date;
-
-import org.apache.poi.POIXMLProperties.CoreProperties;
-import org.apache.poi.POIXMLProperties.CustomProperties;
-import org.apache.poi.POIXMLProperties.ExtendedProperties;
-import org.apache.poi.POIXMLTextExtractor;
+import org.apache.poi.ooxml.POIXMLProperties;
+import org.apache.poi.ooxml.extractor.POIXMLTextExtractor;
import org.apache.poi.openxml4j.opc.internal.PackagePropertiesPart;
-import org.apache.poi.openxml4j.util.Nullable;
import org.apache.poi.xssf.extractor.XSSFEventBasedExcelExtractor;
import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.MSOffice;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.Office;
import org.apache.tika.metadata.OfficeOpenXMLCore;
@@ -42,6 +37,10 @@ import org.apache.xmlbeans.impl.values.XmlValueOutOfRangeException;
import org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProperty;
import org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTProperties;
+import java.math.BigDecimal;
+import java.util.Date;
+import java.util.Optional;
+
/**
* OOXML metadata extractor.
* <p/>
@@ -70,7 +69,7 @@ public class MetadataExtractor {
}
}
- private void extractMetadata(CoreProperties properties, Metadata metadata) {
+ private void extractMetadata(POIXMLProperties.CoreProperties properties, Metadata metadata) {
PackagePropertiesPart propsHolder = properties
.getUnderlyingProperties();
@@ -87,7 +86,7 @@ public class MetadataExtractor {
.getIdentifierProperty());
addProperty(metadata, OfficeOpenXMLCore.SUBJECT,
propsHolder.getSubjectProperty());
- addProperty(metadata, Office.KEYWORDS, propsHolder
+ addProperty(metadata, TikaCoreProperties.KEYWORDS, propsHolder
.getKeywordsProperty());
setProperty(metadata, TikaCoreProperties.LANGUAGE, propsHolder
.getLanguageProperty());
@@ -99,13 +98,23 @@ public class MetadataExtractor {
.getModifiedProperty());
setProperty(metadata, OfficeOpenXMLCore.REVISION, propsHolder
.getRevisionProperty());
-
+ // TODO: Move to OO subject in Tika 2.0
+ setProperty(metadata, TikaCoreProperties.TRANSITION_SUBJECT_TO_OO_SUBJECT,
+ propsHolder.getSubjectProperty());
setProperty(metadata, TikaCoreProperties.TITLE, propsHolder.getTitleProperty());
setProperty(metadata, OfficeOpenXMLCore.VERSION, propsHolder.getVersionProperty());
+ // Legacy Tika-1.0 style stats
+ // TODO Remove these in Tika 2.0
+ setProperty(metadata, Metadata.CATEGORY, propsHolder.getCategoryProperty());
+ setProperty(metadata, Metadata.CONTENT_STATUS, propsHolder
+ .getContentStatusProperty());
+ setProperty(metadata, Metadata.REVISION_NUMBER, propsHolder
+ .getRevisionProperty());
+ setProperty(metadata, Metadata.VERSION, propsHolder.getVersionProperty());
}
- private void extractMetadata(ExtendedProperties properties,
+ private void extractMetadata(POIXMLProperties.ExtendedProperties properties,
Metadata metadata) {
CTProperties propsHolder = properties.getUnderlyingProperties();
@@ -143,9 +152,26 @@ public class MetadataExtractor {
setProperty(metadata, Office.WORD_COUNT, propsHolder.getWords());
setProperty(metadata, Office.CHARACTER_COUNT, propsHolder.getCharacters());
setProperty(metadata, Office.CHARACTER_COUNT_WITH_SPACES, propsHolder.getCharactersWithSpaces());
+
+ // Legacy Tika-1.0 style stats
+ // TODO Remove these in Tika 2.0
+ setProperty(metadata, Metadata.APPLICATION_NAME, propsHolder.getApplication());
+ setProperty(metadata, Metadata.APPLICATION_VERSION, propsHolder.getAppVersion());
+ setProperty(metadata, Metadata.MANAGER, propsHolder.getManager());
+ setProperty(metadata, Metadata.NOTES, propsHolder.getNotes());
+ setProperty(metadata, Metadata.PRESENTATION_FORMAT, propsHolder.getPresentationFormat());
+ setProperty(metadata, Metadata.TEMPLATE, propsHolder.getTemplate());
+ setProperty(metadata, Metadata.TOTAL_TIME, totalTime);
+ setProperty(metadata, MSOffice.PAGE_COUNT, propsHolder.getPages());
+ setProperty(metadata, MSOffice.SLIDE_COUNT, propsHolder.getSlides());
+ setProperty(metadata, MSOffice.PARAGRAPH_COUNT, propsHolder.getParagraphs());
+ setProperty(metadata, MSOffice.LINE_COUNT, propsHolder.getLines());
+ setProperty(metadata, MSOffice.WORD_COUNT, propsHolder.getWords());
+ setProperty(metadata, MSOffice.CHARACTER_COUNT, propsHolder.getCharacters());
+ setProperty(metadata, MSOffice.CHARACTER_COUNT_WITH_SPACES, propsHolder.getCharactersWithSpaces());
}
- private void extractMetadata(CustomProperties properties,
+ private void extractMetadata(POIXMLProperties.CustomProperties properties,
Metadata metadata) {
org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProperties
props = properties.getUnderlyingProperties();
@@ -229,35 +255,40 @@ public class MetadataExtractor {
}
}
- private <T> void setProperty(Metadata metadata, Property property, Nullable<T> nullableValue) {
- T value = nullableValue.getValue();
- if (value != null) {
- if (value instanceof Date) {
- metadata.set(property, (Date) value);
- } else if (value instanceof String) {
- metadata.set(property, (String) value);
- } else if (value instanceof Integer) {
- metadata.set(property, (Integer) value);
- } else if (value instanceof Double) {
- metadata.set(property, (Double) value);
- }
+ private <T> void setProperty(Metadata metadata, Property property, Optional<T> nullableValue) {
+ if (!nullableValue.isPresent()) {
+ return;
+ }
+ T value = nullableValue.get();
+ if (value instanceof Date) {
+ metadata.set(property, (Date) value);
+ } else if (value instanceof String) {
+ metadata.set(property, (String) value);
+ } else if (value instanceof Integer) {
+ metadata.set(property, (Integer) value);
+ } else if (value instanceof Double) {
+ metadata.set(property, (Double) value);
}
}
- private <T> void addProperty(Metadata metadata, Property property, Nullable<T> nullableValue) {
- T value = nullableValue.getValue();
- if (value != null) {
- if (value instanceof String) {
- metadata.add(property, (String) value);
- } else {
- throw new IllegalArgumentException("Can't add property of class: "+nullableValue.getClass());
- }
+ private <T> void addProperty(Metadata metadata, Property property, Optional<T> nullableValue) {
+ if (!nullableValue.isPresent()) {
+ return;
+ }
+ T value = nullableValue.get();
+ if (value instanceof String) {
+ metadata.add(property, (String) value);
+ } else {
+ throw new IllegalArgumentException("Can't add property of class: " + nullableValue.getClass());
}
}
- private void setProperty(Metadata metadata, String name, Nullable<?> value) {
- if (value.getValue() != null) {
- setProperty(metadata, name, value.getValue().toString());
+
+ private void setProperty(Metadata metadata, String property, Optional<String> nullableValue) {
+ if (!nullableValue.isPresent()) {
+ return;
}
+ String value = nullableValue.get();
+ metadata.set(property, value);
}
private void setProperty(Metadata metadata, Property property, String value) {
@@ -284,11 +315,11 @@ public class MetadataExtractor {
}
}
- private void addMultiProperty(Metadata metadata, Property property, Nullable<String> value) {
- if (value == null) {
+ private void addMultiProperty(Metadata metadata, Property property, Optional<String> value) {
+ if (!value.isPresent()) {
return;
}
- SummaryExtractor.addMulti(metadata, property, value.getValue());
+ SummaryExtractor.addMulti(metadata, property, value.get());
}
}