You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ni...@apache.org on 2014/06/09 15:56:00 UTC

svn commit: r1601385 - in /tika/trunk: tika-core/src/main/java/org/apache/tika/metadata/ tika-core/src/main/java/org/apache/tika/utils/ tika-core/src/test/java/org/apache/tika/metadata/ tika-parsers/src/main/java/org/apache/tika/parser/font/

Author: nick
Date: Mon Jun  9 13:55:59 2014
New Revision: 1601385

URL: http://svn.apache.org/r1601385
Log:
Provide explicit DateUtil support for formatting Dates in an unknown timezone, matching what TestMetadata checks detail, and allow for setting a Metadata date value from a Calendar. Finally, use this for the TTF dates, to hopefully solve the TIKA-1325 test problem

Modified:
    tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/Metadata.java
    tika/trunk/tika-core/src/main/java/org/apache/tika/utils/DateUtils.java
    tika/trunk/tika-core/src/test/java/org/apache/tika/metadata/TestMetadata.java
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/font/TrueTypeParser.java

Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/Metadata.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/Metadata.java?rev=1601385&r1=1601384&r2=1601385&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/Metadata.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/Metadata.java Mon Jun  9 13:55:59 2014
@@ -25,6 +25,7 @@ import java.text.DateFormat;
 import java.text.DateFormatSymbols;
 import java.text.ParseException;
 import java.text.SimpleDateFormat;
+import java.util.Calendar;
 import java.util.Date;
 import java.util.Enumeration;
 import java.util.HashMap;
@@ -474,6 +475,27 @@ public class Metadata implements Creativ
     }
 
     /**
+     * Sets the date value of the identified metadata property.
+     *
+     * @since Apache Tika 0.8
+     * @param property simple integer property definition
+     * @param date     property value
+     */
+    public void set(Property property, Calendar date) {
+        if(property.getPrimaryProperty().getPropertyType() != Property.PropertyType.SIMPLE) {
+            throw new PropertyTypeException(Property.PropertyType.SIMPLE, property.getPrimaryProperty().getPropertyType());
+        }
+        if(property.getPrimaryProperty().getValueType() != Property.ValueType.DATE) {
+            throw new PropertyTypeException(Property.ValueType.DATE, property.getPrimaryProperty().getValueType());
+        }
+        String dateString = null;
+        if (date != null) {
+            dateString = formatDate(date);
+        }
+        set(property, dateString);
+    }
+
+    /**
      * Remove a metadata and all its associated values.
      * 
      * @param name

Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/utils/DateUtils.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/utils/DateUtils.java?rev=1601385&r1=1601384&r2=1601385&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/utils/DateUtils.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/utils/DateUtils.java Mon Jun  9 13:55:59 2014
@@ -50,11 +50,44 @@ public class DateUtils {
      *
      * @see <a href="https://issues.apache.org/jira/browse/TIKA-495">TIKA-495</a>
      * @param date given date
-     * @return ISO 8601 date string
+     * @return ISO 8601 date string, including timezone details
      */
     public static String formatDate(Date date) {
         Calendar calendar = GregorianCalendar.getInstance(UTC, Locale.US);
         calendar.setTime(date);
+        return doFormatDate(calendar);
+    }
+    /**
+     * Returns a ISO 8601 representation of the given date. This method 
+     * is thread safe and non-blocking.
+     *
+     * @see <a href="https://issues.apache.org/jira/browse/TIKA-495">TIKA-495</a>
+     * @param date given date
+     * @return ISO 8601 date string, including timezone details
+     */
+    public static String formatDate(Calendar date) {
+        // Explicitly switch it into UTC before formatting 
+        date.setTimeZone(UTC);
+        return doFormatDate(date);
+    }
+    /**
+     * Returns a ISO 8601 representation of the given date, which is
+     *  in an unknown timezone. This method is thread safe and non-blocking.
+     *
+     * @see <a href="https://issues.apache.org/jira/browse/TIKA-495">TIKA-495</a>
+     * @param date given date
+     * @return ISO 8601 date string, without timezone details
+     */
+    public static String formatDateUnknownTimezone(Date date) {
+        // Create the Calendar object in the system timezone
+        Calendar calendar = GregorianCalendar.getInstance(Locale.US);
+        calendar.setTime(date);
+        // Have it formatted
+        String formatted = formatDate(date);
+        // Strip the timezone details before returning
+        return formatted.substring(0, formatted.length()-1);
+    }
+    private static String doFormatDate(Calendar calendar) {
         return String.format(
                 "%04d-%02d-%02dT%02d:%02d:%02dZ",
                 calendar.get(Calendar.YEAR),

Modified: tika/trunk/tika-core/src/test/java/org/apache/tika/metadata/TestMetadata.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/java/org/apache/tika/metadata/TestMetadata.java?rev=1601385&r1=1601384&r2=1601385&view=diff
==============================================================================
--- tika/trunk/tika-core/src/test/java/org/apache/tika/metadata/TestMetadata.java (original)
+++ tika/trunk/tika-core/src/test/java/org/apache/tika/metadata/TestMetadata.java Mon Jun  9 13:55:59 2014
@@ -20,9 +20,10 @@ package org.apache.tika.metadata;
 import java.util.Date;
 import java.util.Properties;
 
-
+import org.apache.tika.utils.DateUtils;
 import org.junit.Test;
 
+
 //Junit imports
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
@@ -337,9 +338,15 @@ public class TestMetadata {
     public void testGetSetDateUnspecifiedTimezone() {
         Metadata meta = new Metadata();    
         
+        // Set explictly without a timezone
         meta.set(TikaCoreProperties.CREATED, "1970-01-01T00:00:01");
         assertEquals("should return string without time zone specifier because zone is not known",
         		"1970-01-01T00:00:01", meta.get(TikaCoreProperties.CREATED));
+        
+        // Now ask DateUtils to format for us without one
+        meta.set(TikaCoreProperties.CREATED, DateUtils.formatDateUnknownTimezone(new Date(1000)));
+        assertEquals("should return string without time zone specifier because zone is not known",
+                         "1970-01-01T00:00:01", meta.get(TikaCoreProperties.CREATED));
     }
     
     /**

Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/font/TrueTypeParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/font/TrueTypeParser.java?rev=1601385&r1=1601384&r2=1601385&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/font/TrueTypeParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/font/TrueTypeParser.java Mon Jun  9 13:55:59 2014
@@ -72,9 +72,9 @@ public class TrueTypeParser extends Abst
         // Report the details of the font
         metadata.set(Metadata.CONTENT_TYPE, TYPE.toString());
         metadata.set(TikaCoreProperties.CREATED, 
-                font.getHeader().getCreated().getTime());
+                font.getHeader().getCreated());
         metadata.set(TikaCoreProperties.MODIFIED,
-                font.getHeader().getModified().getTime());
+                font.getHeader().getModified());
         metadata.set(AdobeFontMetricParser.MET_DOC_VERSION,
                 Float.toString(font.getHeader().getVersion()));