You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2009/02/14 01:07:51 UTC

svn commit: r744308 - in /lucene/tika/trunk: CHANGES.txt src/main/java/org/apache/tika/metadata/MSOffice.java src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java

Author: jukka
Date: Sat Feb 14 00:07:51 2009
New Revision: 744308

URL: http://svn.apache.org/viewvc?rev=744308&view=rev
Log:
TIKA-186: Refactor the MS Office property names to MSOffice.java

Patch by Andrzej Rusin.

Modified:
    lucene/tika/trunk/CHANGES.txt
    lucene/tika/trunk/src/main/java/org/apache/tika/metadata/MSOffice.java
    lucene/tika/trunk/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java

Modified: lucene/tika/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/CHANGES.txt?rev=744308&r1=744307&r2=744308&view=diff
==============================================================================
--- lucene/tika/trunk/CHANGES.txt (original)
+++ lucene/tika/trunk/CHANGES.txt Sat Feb 14 00:07:51 2009
@@ -9,6 +9,11 @@
   * Tika now supports the Office Open XML format used by
     Microsoft Office 2007. (TIKA-152)
 
+  * All the metadata keys for Microsoft Office document properties are now
+    included as constants in the MSOffice interface. Clients should use
+    these constants instead of the raw string values to refer to specific
+    metadata items. (TIKA-186)
+
   * Automatic detection of document types in Tika has been improved.
     For example Tika can now detect plain text just by looking at the first
     few bytes of the document. (TIKA-154)

Modified: lucene/tika/trunk/src/main/java/org/apache/tika/metadata/MSOffice.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/src/main/java/org/apache/tika/metadata/MSOffice.java?rev=744308&r1=744307&r2=744308&view=diff
==============================================================================
--- lucene/tika/trunk/src/main/java/org/apache/tika/metadata/MSOffice.java (original)
+++ lucene/tika/trunk/src/main/java/org/apache/tika/metadata/MSOffice.java Sat Feb 14 00:07:51 2009
@@ -17,9 +17,7 @@
 package org.apache.tika.metadata;
 
 /**
- * A collection of <i>"Office"</i> documents properties names.
- * 
- * 
+ * A collection of Microsoft Office documents property names.
  */
 public interface MSOffice {
 
@@ -65,10 +63,18 @@
   
   public static final String APPLICATION_VERSION = "Application-Version";
   
-  public static final String VERSION = "version";
+  public static final String VERSION = "Version";
   
   public static final String CONTENT_STATUS = "Content-Status";
   
-  public static final String CATEGORY = "category";
+  public static final String CATEGORY = "Category";
+
+  public static final String COMPANY = "Company";
+
+  public static final String SECURITY = "Security";
+
+  public static final String EDIT_TIME = "Edit-Time";
+
+  public static final String CREATION_DATE = "Creation-Date";
 
 }

Modified: lucene/tika/trunk/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java?rev=744308&r1=744307&r2=744308&view=diff
==============================================================================
--- lucene/tika/trunk/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java (original)
+++ lucene/tika/trunk/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java Sat Feb 14 00:07:51 2009
@@ -46,7 +46,7 @@
  */
 public class OfficeParser implements Parser {
 
-    private static final String SUMMARY_INFORMATION =
+	private static final String SUMMARY_INFORMATION =
         SummaryInformation.DEFAULT_STREAM_NAME;
 
     private static final String DOCUMENT_SUMMARY_INFORMATION =
@@ -134,19 +134,19 @@
         set(metadata, Metadata.TEMPLATE, summary.getTemplate());
         set(metadata, Metadata.APPLICATION_NAME, summary.getApplicationName());
         set(metadata, Metadata.REVISION_NUMBER, summary.getRevNumber());
-        set(metadata, "creationdate", summary.getCreateDateTime());
+        set(metadata, Metadata.CREATION_DATE, summary.getCreateDateTime());
         set(metadata, Metadata.CHARACTER_COUNT, summary.getCharCount());
-        set(metadata, "edittime", summary.getEditTime());
+        set(metadata, Metadata.EDIT_TIME, summary.getEditTime());
         set(metadata, Metadata.LAST_SAVED, summary.getLastSaveDateTime());
         set(metadata, Metadata.PAGE_COUNT, summary.getPageCount());
-        set(metadata, "security", summary.getSecurity());
+        set(metadata, Metadata.SECURITY, summary.getSecurity());
         set(metadata, Metadata.WORD_COUNT, summary.getWordCount());
         set(metadata, Metadata.LAST_PRINTED, summary.getLastPrinted());
     }
 
     private void parse(DocumentSummaryInformation summary, Metadata metadata) {
-        set(metadata, "company", summary.getCompany());
-        set(metadata, "manager", summary.getManager());
+        set(metadata, Metadata.COMPANY, summary.getCompany());
+        set(metadata, Metadata.MANAGER, summary.getManager());
     }
 
     private void setType(Metadata metadata, String type) {