You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2009/02/14 01:07:51 UTC
svn commit: r744308 - in /lucene/tika/trunk: CHANGES.txt
src/main/java/org/apache/tika/metadata/MSOffice.java
src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java
Author: jukka
Date: Sat Feb 14 00:07:51 2009
New Revision: 744308
URL: http://svn.apache.org/viewvc?rev=744308&view=rev
Log:
TIKA-186: Refactor the MS Office property names to MSOffice.java
Patch by Andrzej Rusin.
Modified:
lucene/tika/trunk/CHANGES.txt
lucene/tika/trunk/src/main/java/org/apache/tika/metadata/MSOffice.java
lucene/tika/trunk/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java
Modified: lucene/tika/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/CHANGES.txt?rev=744308&r1=744307&r2=744308&view=diff
==============================================================================
--- lucene/tika/trunk/CHANGES.txt (original)
+++ lucene/tika/trunk/CHANGES.txt Sat Feb 14 00:07:51 2009
@@ -9,6 +9,11 @@
* Tika now supports the Office Open XML format used by
Microsoft Office 2007. (TIKA-152)
+ * All the metadata keys for Microsoft Office document properties are now
+ included as constants in the MSOffice interface. Clients should use
+ these constants instead of the raw string values to refer to specific
+ metadata items. (TIKA-186)
+
* Automatic detection of document types in Tika has been improved.
For example Tika can now detect plain text just by looking at the first
few bytes of the document. (TIKA-154)
Modified: lucene/tika/trunk/src/main/java/org/apache/tika/metadata/MSOffice.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/src/main/java/org/apache/tika/metadata/MSOffice.java?rev=744308&r1=744307&r2=744308&view=diff
==============================================================================
--- lucene/tika/trunk/src/main/java/org/apache/tika/metadata/MSOffice.java (original)
+++ lucene/tika/trunk/src/main/java/org/apache/tika/metadata/MSOffice.java Sat Feb 14 00:07:51 2009
@@ -17,9 +17,7 @@
package org.apache.tika.metadata;
/**
- * A collection of <i>"Office"</i> documents properties names.
- *
- *
+ * A collection of Microsoft Office documents property names.
*/
public interface MSOffice {
@@ -65,10 +63,18 @@
public static final String APPLICATION_VERSION = "Application-Version";
- public static final String VERSION = "version";
+ public static final String VERSION = "Version";
public static final String CONTENT_STATUS = "Content-Status";
- public static final String CATEGORY = "category";
+ public static final String CATEGORY = "Category";
+
+ public static final String COMPANY = "Company";
+
+ public static final String SECURITY = "Security";
+
+ public static final String EDIT_TIME = "Edit-Time";
+
+ public static final String CREATION_DATE = "Creation-Date";
}
Modified: lucene/tika/trunk/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java?rev=744308&r1=744307&r2=744308&view=diff
==============================================================================
--- lucene/tika/trunk/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java (original)
+++ lucene/tika/trunk/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java Sat Feb 14 00:07:51 2009
@@ -46,7 +46,7 @@
*/
public class OfficeParser implements Parser {
- private static final String SUMMARY_INFORMATION =
+ private static final String SUMMARY_INFORMATION =
SummaryInformation.DEFAULT_STREAM_NAME;
private static final String DOCUMENT_SUMMARY_INFORMATION =
@@ -134,19 +134,19 @@
set(metadata, Metadata.TEMPLATE, summary.getTemplate());
set(metadata, Metadata.APPLICATION_NAME, summary.getApplicationName());
set(metadata, Metadata.REVISION_NUMBER, summary.getRevNumber());
- set(metadata, "creationdate", summary.getCreateDateTime());
+ set(metadata, Metadata.CREATION_DATE, summary.getCreateDateTime());
set(metadata, Metadata.CHARACTER_COUNT, summary.getCharCount());
- set(metadata, "edittime", summary.getEditTime());
+ set(metadata, Metadata.EDIT_TIME, summary.getEditTime());
set(metadata, Metadata.LAST_SAVED, summary.getLastSaveDateTime());
set(metadata, Metadata.PAGE_COUNT, summary.getPageCount());
- set(metadata, "security", summary.getSecurity());
+ set(metadata, Metadata.SECURITY, summary.getSecurity());
set(metadata, Metadata.WORD_COUNT, summary.getWordCount());
set(metadata, Metadata.LAST_PRINTED, summary.getLastPrinted());
}
private void parse(DocumentSummaryInformation summary, Metadata metadata) {
- set(metadata, "company", summary.getCompany());
- set(metadata, "manager", summary.getManager());
+ set(metadata, Metadata.COMPANY, summary.getCompany());
+ set(metadata, Metadata.MANAGER, summary.getManager());
}
private void setType(Metadata metadata, String type) {