You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ni...@apache.org on 2012/05/17 18:20:10 UTC
svn commit: r1339685 - in
/tika/trunk/tika-core/src/main/java/org/apache/tika/metadata: MSOffice.java
Office.java
Author: nick
Date: Thu May 17 16:20:10 2012
New Revision: 1339685
URL: http://svn.apache.org/viewvc?rev=1339685&view=rev
Log:
TIKA-929 Start to replace the old non-prefixed, largely non-property MSOffice metadata definitions with new style ones
Added:
tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/Office.java
Modified:
tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/MSOffice.java
Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/MSOffice.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/MSOffice.java?rev=1339685&r1=1339684&r2=1339685&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/MSOffice.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/MSOffice.java Thu May 17 16:20:10 2012
@@ -17,7 +17,10 @@
package org.apache.tika.metadata;
/**
- * A collection of Microsoft Office documents property names.
+ * A collection of Microsoft Office and Open Document property names.
+ *
+ * This is being replaced with cleaner, better defined properties in
+ * {@link Office}.
*/
public interface MSOffice {
Added: tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/Office.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/Office.java?rev=1339685&view=auto
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/Office.java (added)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/Office.java Thu May 17 16:20:10 2012
@@ -0,0 +1,117 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.metadata;
+
+/**
+ * Office Document properties collection. These properties apply to
+ * Office / Productivity Documents of all forms, including (but not limited
+ * to) MS Office and OpenDocument formats.
+ * This is a logical collection of properties, which may be drawn from a
+ * few different external definitions.
+ *
+ * Note that some of the legacy properties from the {@link MSOffice}
+ * collection still need to be migrated over
+ */
+public interface Office {
+ // These are taken from the OpenDocumentFormat specification
+ public static final String NAMESPACE_URI_DOC_META = "urn:oasis:names:tc:opendocument:xmlns:meta:1.0";
+ public static final String PREFIX_DOC_META = "doc-meta";
+ public static final String PREFIX_DOC_META_STATS = "doc-meta-stats";
+
+ /**
+ * For user defined metadata entries in the document,
+ * what prefix should be attached to the key names.
+ * eg <meta:user-defined meta:name="Info1">Text1</meta:user-defined> becomes custom:Info1=Text1
+ */
+ public static final String USER_DEFINED_METADATA_NAME_PREFIX = "custom:";
+
+
+ /**
+ * Keywords pertaining to a document.
+ */
+ Property KEYWORDS = Property.internalTextBag(
+ PREFIX_DOC_META + Metadata.PREFIX_DELIMITER + "keyword");
+
+ /**
+ * Name of the initial creator/author of a document
+ */
+ Property INITIAL_AUTHOR = Property.internalText(
+ PREFIX_DOC_META + Metadata.PREFIX_DELIMITER + "initial-author");
+
+ /**
+ * Name of the last (most recent) author of a document
+ */
+ Property LAST_AUTHOR = Property.internalText(
+ PREFIX_DOC_META + Metadata.PREFIX_DELIMITER + "last-author");
+
+ /** When was the document created? */
+ Property CREATION_DATE = Property.internalDate(
+ PREFIX_DOC_META + Metadata.PREFIX_DELIMITER + "creation-date");
+
+ /** When was the document last saved? */
+ Property SAVE_DATE = Property.internalDate(
+ PREFIX_DOC_META + Metadata.PREFIX_DELIMITER + "save-date");
+
+ /** When was the document last printed? */
+ Property PRINT_DATE = Property.internalDate(
+ PREFIX_DOC_META + Metadata.PREFIX_DELIMITER + "print-date");
+
+
+
+ /** The number of Slides are there in the (presentation) document */
+ Property SLIDE_COUNT = Property.internalInteger(
+ PREFIX_DOC_META_STATS + Metadata.PREFIX_DELIMITER + "slide-count");
+
+ /** The number of Pages are there in the (paged) document */
+ Property PAGE_COUNT = Property.internalInteger(
+ PREFIX_DOC_META_STATS + Metadata.PREFIX_DELIMITER + "page-count");
+
+ /** The number of individual Paragraphs in the document */
+ Property PARAGRAPH_COUNT = Property.internalInteger(
+ PREFIX_DOC_META_STATS + Metadata.PREFIX_DELIMITER + "paragraph-count");
+
+ /** The number of lines in the document */
+ Property LINE_COUNT = Property.internalInteger(
+ PREFIX_DOC_META_STATS + Metadata.PREFIX_DELIMITER + "line-count");
+
+ /** The number of Words in the document */
+ Property WORD_COUNT = Property.internalInteger(
+ PREFIX_DOC_META_STATS + Metadata.PREFIX_DELIMITER + "word-count");
+
+ /** The number of Characters in the document */
+ Property CHARACTER_COUNT = Property.internalInteger(
+ PREFIX_DOC_META_STATS + Metadata.PREFIX_DELIMITER + "character-count");
+
+ /** The number of Characters in the document, including spaces */
+ Property CHARACTER_COUNT_WITH_SPACES = Property.internalInteger(
+ PREFIX_DOC_META_STATS + Metadata.PREFIX_DELIMITER + "character-count-with-spaces");
+
+ /** The number of Tables in the document */
+ Property TABLE_COUNT = Property.internalInteger(
+ PREFIX_DOC_META_STATS + Metadata.PREFIX_DELIMITER + "table-count");
+
+ /** The number of Images in the document */
+ Property IMAGE_COUNT = Property.internalInteger(
+ PREFIX_DOC_META_STATS + Metadata.PREFIX_DELIMITER + "image-count");
+
+ /**
+ * The number of Objects in the document. These are typically non-Image resources
+ * embedded in the document, such as other documents or non-Image media.
+ */
+ Property OBJECT_COUNT = Property.internalInteger(
+ PREFIX_DOC_META_STATS + Metadata.PREFIX_DELIMITER + "object-count");
+}