You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2013/03/12 02:12:51 UTC

svn commit: r1455382 - in /manifoldcf/trunk/connectors/rss/connector/src: main/java/org/apache/manifoldcf/crawler/connectors/rss/RSSConnector.java test/java/org/apache/manifoldcf/crawler/connectors/rss/tests/DateTest.java

Author: kwright
Date: Tue Mar 12 01:12:51 2013
New Revision: 1455382

URL: http://svn.apache.org/r1455382
Log:
Simplify ISO 8601 date parser; add more tests for it.

Modified:
    manifoldcf/trunk/connectors/rss/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/rss/RSSConnector.java
    manifoldcf/trunk/connectors/rss/connector/src/test/java/org/apache/manifoldcf/crawler/connectors/rss/tests/DateTest.java

Modified: manifoldcf/trunk/connectors/rss/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/rss/RSSConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/rss/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/rss/RSSConnector.java?rev=1455382&r1=1455381&r2=1455382&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/rss/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/rss/RSSConnector.java (original)
+++ manifoldcf/trunk/connectors/rss/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/rss/RSSConnector.java Tue Mar 12 01:12:51 2013
@@ -5168,108 +5168,6 @@ public class RSSConnector extends org.ap
     }
   }
 
-  /** Parse an ISO8601 date */
-  protected static Date parseISO8601Date(String dateValue)
-  {
-    dateValue = dateValue.trim();
-    // Format: YYYY-MM-DDTHH:MM:SSZ
-    // 2007-11-11T05:00:00Z
-    int index = dateValue.indexOf("-");
-    if (index == -1)
-      return null;
-    String year = dateValue.substring(0,index);
-    dateValue = dateValue.substring(index+1);
-    index = dateValue.indexOf("-");
-    if (index == -1)
-      return null;
-    String month = dateValue.substring(0,index);
-    dateValue = dateValue.substring(index+1);
-    index = dateValue.indexOf("T");
-    String day;
-    String hour = "0";
-    String minute = "0";
-    String second = "0";
-    String timezone = "GMT";
-    if (index != -1)
-    {
-      day = dateValue.substring(0,index);
-      dateValue = dateValue.substring(index+1);
-      index = dateValue.indexOf(":");
-      if (index == -1)
-        return null;
-      hour = dateValue.substring(0,index);
-      dateValue = dateValue.substring(index+1);
-      index = dateValue.indexOf(":");
-      if (index != -1)
-      {
-        minute = dateValue.substring(0,index);
-        dateValue = dateValue.substring(index+1);
-        if (dateValue.endsWith("Z"))
-        {
-          index = dateValue.indexOf("Z");
-          if (index == -1)
-            return null;
-        }
-        else
-        {
-          index = dateValue.indexOf("+");
-          if (index == -1)
-            index = dateValue.indexOf("-");
-          if (index == -1)
-            return null;
-          timezone = "GMT"+dateValue.substring(index);
-        }
-        second = dateValue.substring(0,index);
-      }
-      else
-      {
-        minute = dateValue;
-      }
-    }
-    else
-    {
-      day = dateValue;
-    }
-
-    // Now construct a calendar object from this
-    TimeZone tz = TimeZone.getTimeZone(timezone);
-
-    Calendar c = new GregorianCalendar(tz);
-    try
-    {
-      int value = Integer.parseInt(year);
-      c.set(Calendar.YEAR,value);
-
-      value = Integer.parseInt(month);
-      c.set(Calendar.MONTH,value-1);
-
-      value = Integer.parseInt(day);
-      c.set(Calendar.DAY_OF_MONTH,value);
-
-      value = Integer.parseInt(hour);
-      c.set(Calendar.HOUR_OF_DAY,value);
-
-      value = Integer.parseInt(minute);
-      c.set(Calendar.MINUTE,value);
-
-      int index2 = second.indexOf(".");
-      if (index2 != -1)
-        second = second.substring(0,index2);
-
-      value = Integer.parseInt(second);
-      c.set(Calendar.SECOND,value);
-
-      c.set(Calendar.MILLISECOND,0);
-      return new Date(c.getTimeInMillis());
-    }
-    catch (NumberFormatException e)
-    {
-      return null;
-    }
-
-
-  }
-
   /** Parse a China Daily News date */
   protected static Date parseChinaDate(String dateValue)
   {
@@ -5355,20 +5253,34 @@ public class RSSConnector extends org.ap
 
   }
 
-  /**
+  /** Parse ISO 8601 dates, and their common variants.
+  */
   protected static Date parseISO8601Date(String isoDateValue)
   {
-    java.text.DateFormat iso8601Format = new java.text.SimpleDateFormat ("yyyy-MM-dd'T'HH:mm:ss'Z'");
+    // There are a number of variations on the basic format.
+    // We'll look for key characters to help is determine which is which.
+    StringBuilder isoFormatString = new StringBuilder("yy");
+    if (isoDateValue.length() > 2 && isoDateValue.charAt(2) != '-')
+      isoFormatString.append("yy");
+    isoFormatString.append("-MM-dd'T'HH:mm:ss");
+    if (isoDateValue.indexOf(".") != -1)
+      isoFormatString.append(".SSS");
+    if (isoDateValue.endsWith("Z"))
+      isoFormatString.append("'Z'");
+    else
+      isoFormatString.append("Z");      // RFC 822 time, including general time zones
+    java.text.DateFormat iso8601Format = new java.text.SimpleDateFormat(isoFormatString.toString());
     try
     {
       return iso8601Format.parse(isoDateValue);
     }
     catch (java.text.ParseException e)
     {
+      System.out.println("Date value: '"+isoDateValue+"'");
+      e.printStackTrace();
       return null;
     }
   }
-  */
   
   /** Timezone mapping from RFC822 timezones to ones understood by Java */
   

Modified: manifoldcf/trunk/connectors/rss/connector/src/test/java/org/apache/manifoldcf/crawler/connectors/rss/tests/DateTest.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/rss/connector/src/test/java/org/apache/manifoldcf/crawler/connectors/rss/tests/DateTest.java?rev=1455382&r1=1455381&r2=1455382&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/rss/connector/src/test/java/org/apache/manifoldcf/crawler/connectors/rss/tests/DateTest.java (original)
+++ manifoldcf/trunk/connectors/rss/connector/src/test/java/org/apache/manifoldcf/crawler/connectors/rss/tests/DateTest.java Tue Mar 12 01:12:51 2013
@@ -30,11 +30,15 @@ public class DateTest extends RSSConnect
   public void iso8601()
     throws Exception
   {
-    Date d = RSSConnector.parseISO8601Date("2012-11-15T01:32:33.344Z");
+    Date d = RSSConnector.parseISO8601Date("96-11-15T01:32:33.344GMT");
+    assertNotNull(d);
+    d = RSSConnector.parseISO8601Date("2012-11-15T01:32:33.344Z");
     assertNotNull(d);
     d = RSSConnector.parseISO8601Date("2012-11-15T01:32:33Z");
     assertNotNull(d);
-    d = RSSConnector.parseISO8601Date("2012-11-15T01:32:33+01");
+    d = RSSConnector.parseISO8601Date("2012-11-15T01:32:33+0100");
+    assertNotNull(d);
+    d = RSSConnector.parseISO8601Date("2012-11-15T01:32:33GMT-03:00");
     assertNotNull(d);
   }