You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2013/03/08 22:39:27 UTC

svn commit: r1454590 - in /manifoldcf/trunk/connectors: rss/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/rss/ sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/ wiki/connector/src/main/java/org/a...

Author: kwright
Date: Fri Mar  8 21:39:27 2013
New Revision: 1454590

URL: http://svn.apache.org/r1454590
Log:
Parse ISO8601 date values also as part of RSS fields.  Fix for CONNECTORS-622.

Modified:
    manifoldcf/trunk/connectors/rss/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/rss/RSSConnector.java
    manifoldcf/trunk/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SharePointRepository.java
    manifoldcf/trunk/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/WikiConnector.java

Modified: manifoldcf/trunk/connectors/rss/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/rss/RSSConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/rss/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/rss/RSSConnector.java?rev=1454590&r1=1454589&r2=1454590&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/rss/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/rss/RSSConnector.java (original)
+++ manifoldcf/trunk/connectors/rss/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/rss/RSSConnector.java Fri Mar  8 21:39:27 2013
@@ -4003,15 +4003,23 @@ public class RSSConnector extends org.ap
 
       if (linkField != null && linkField.length() > 0)
       {
-        Long origDate = null;
+        Date origDateDate = null;
         if (pubDateField != null && pubDateField.length() > 0)
         {
-          origDate = parseRSSDate(pubDateField);
+          origDateDate = parseRFC822Date(pubDateField);
           // Special for China Daily News
-          if (origDate == null)
-            origDate = parseChinaDate(pubDateField);
-        }
-
+          if (origDateDate == null)
+            origDateDate = parseChinaDate(pubDateField);
+          // Special for LL
+          if (origDateDate == null)
+            origDateDate = parseISO8601Date(pubDateField);
+        }
+        Long origDate;
+        if (origDateDate != null)
+          origDate = new Long(origDateDate.getTime());
+        else
+          origDate = null;
+        
         String[] links = linkField.split(", ");
         int l = 0;
         while (l < links.length)
@@ -5263,7 +5271,7 @@ public class RSSConnector extends org.ap
   }
 
   /** Parse a China Daily News date */
-  protected static Long parseChinaDate(String dateValue)
+  protected static Date parseChinaDate(String dateValue)
   {
     dateValue = dateValue.trim();
     // Format: 2007/12/30 11:01
@@ -5338,7 +5346,7 @@ public class RSSConnector extends org.ap
       c.set(Calendar.SECOND,value);
 
       c.set(Calendar.MILLISECOND,0);
-      return new Long(c.getTimeInMillis());
+      return new Date(c.getTimeInMillis());
     }
     catch (NumberFormatException e)
     {
@@ -5347,6 +5355,21 @@ public class RSSConnector extends org.ap
 
   }
 
+  /** Parse ISO8601 date.
+  */
+  protected static Date parseISO8601Date(String isoDateValue)
+  {
+    java.text.DateFormat iso8601Format = new java.text.SimpleDateFormat ("yyyy-MM-dd'T'HH:mm:ss'Z'");
+    try
+    {
+      return iso8601Format.parse(isoDateValue);
+    }
+    catch (java.text.ParseException e)
+    {
+      return null;
+    }
+  }
+
   /** Timezone mapping from RFC822 timezones to ones understood by Java */
   protected static final HashMap milTzMap;
   static
@@ -5360,8 +5383,8 @@ public class RSSConnector extends org.ap
     milTzMap.put("Y","GMT+12:00");
   }
 
-  /** Parse an RSS date */
-  protected static Long parseRSSDate(String dateValue)
+  /** Parse RFC822 date */
+  protected static Date parseRFC822Date(String dateValue)
   {
     dateValue = dateValue.trim();
     // See http://www.faqs.org/rfcs/rfc822.html for legal formats
@@ -5493,7 +5516,7 @@ public class RSSConnector extends org.ap
       c.set(Calendar.SECOND,value);
 
       c.set(Calendar.MILLISECOND,0);
-      return new Long(c.getTimeInMillis());
+      return new Date(c.getTimeInMillis());
     }
     catch (NumberFormatException e)
     {

Modified: manifoldcf/trunk/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SharePointRepository.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SharePointRepository.java?rev=1454590&r1=1454589&r2=1454590&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SharePointRepository.java (original)
+++ manifoldcf/trunk/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SharePointRepository.java Fri Mar  8 21:39:27 2013
@@ -1105,7 +1105,7 @@ public class SharePointRepository extend
   {
     if (dateTimeValue == null)
       return null;
-    java.text.DateFormat iso8601Format = new java.text.SimpleDateFormat ("yyyy-MM-dd'T'HH:mm:ssZ");
+    java.text.DateFormat iso8601Format = new java.text.SimpleDateFormat ("yyyy-MM-dd'T'HH:mm:ss'Z'");
     try
     {
       return iso8601Format.parse(dateTimeValue);

Modified: manifoldcf/trunk/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/WikiConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/WikiConnector.java?rev=1454590&r1=1454589&r2=1454590&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/WikiConnector.java (original)
+++ manifoldcf/trunk/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/WikiConnector.java Fri Mar  8 21:39:27 2013
@@ -3661,7 +3661,7 @@ public class WikiConnector extends org.a
   
   protected static Date parseISODate(String isoDateValue)
   {
-    java.text.DateFormat iso8601Format = new java.text.SimpleDateFormat ("yyyy-MM-dd'T'HH:mm:ssZ");
+    java.text.DateFormat iso8601Format = new java.text.SimpleDateFormat ("yyyy-MM-dd'T'HH:mm:ss'Z'");
     try
     {
       return iso8601Format.parse(isoDateValue);