You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2013/03/08 22:39:27 UTC
svn commit: r1454590 - in /manifoldcf/trunk/connectors:
rss/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/rss/
sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/
wiki/connector/src/main/java/org/a...
Author: kwright
Date: Fri Mar 8 21:39:27 2013
New Revision: 1454590
URL: http://svn.apache.org/r1454590
Log:
Parse ISO8601 date values also as part of RSS fields. Fix for CONNECTORS-622.
Modified:
manifoldcf/trunk/connectors/rss/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/rss/RSSConnector.java
manifoldcf/trunk/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SharePointRepository.java
manifoldcf/trunk/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/WikiConnector.java
Modified: manifoldcf/trunk/connectors/rss/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/rss/RSSConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/rss/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/rss/RSSConnector.java?rev=1454590&r1=1454589&r2=1454590&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/rss/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/rss/RSSConnector.java (original)
+++ manifoldcf/trunk/connectors/rss/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/rss/RSSConnector.java Fri Mar 8 21:39:27 2013
@@ -4003,15 +4003,23 @@ public class RSSConnector extends org.ap
if (linkField != null && linkField.length() > 0)
{
- Long origDate = null;
+ Date origDateDate = null;
if (pubDateField != null && pubDateField.length() > 0)
{
- origDate = parseRSSDate(pubDateField);
+ origDateDate = parseRFC822Date(pubDateField);
// Special for China Daily News
- if (origDate == null)
- origDate = parseChinaDate(pubDateField);
- }
-
+ if (origDateDate == null)
+ origDateDate = parseChinaDate(pubDateField);
+ // Special for LL
+ if (origDateDate == null)
+ origDateDate = parseISO8601Date(pubDateField);
+ }
+ Long origDate;
+ if (origDateDate != null)
+ origDate = new Long(origDateDate.getTime());
+ else
+ origDate = null;
+
String[] links = linkField.split(", ");
int l = 0;
while (l < links.length)
@@ -5263,7 +5271,7 @@ public class RSSConnector extends org.ap
}
/** Parse a China Daily News date */
- protected static Long parseChinaDate(String dateValue)
+ protected static Date parseChinaDate(String dateValue)
{
dateValue = dateValue.trim();
// Format: 2007/12/30 11:01
@@ -5338,7 +5346,7 @@ public class RSSConnector extends org.ap
c.set(Calendar.SECOND,value);
c.set(Calendar.MILLISECOND,0);
- return new Long(c.getTimeInMillis());
+ return new Date(c.getTimeInMillis());
}
catch (NumberFormatException e)
{
@@ -5347,6 +5355,21 @@ public class RSSConnector extends org.ap
}
+ /** Parse ISO8601 date.
+ */
+ protected static Date parseISO8601Date(String isoDateValue)
+ {
+ java.text.DateFormat iso8601Format = new java.text.SimpleDateFormat ("yyyy-MM-dd'T'HH:mm:ss'Z'");
+ try
+ {
+ return iso8601Format.parse(isoDateValue);
+ }
+ catch (java.text.ParseException e)
+ {
+ return null;
+ }
+ }
+
/** Timezone mapping from RFC822 timezones to ones understood by Java */
protected static final HashMap milTzMap;
static
@@ -5360,8 +5383,8 @@ public class RSSConnector extends org.ap
milTzMap.put("Y","GMT+12:00");
}
- /** Parse an RSS date */
- protected static Long parseRSSDate(String dateValue)
+ /** Parse RFC822 date */
+ protected static Date parseRFC822Date(String dateValue)
{
dateValue = dateValue.trim();
// See http://www.faqs.org/rfcs/rfc822.html for legal formats
@@ -5493,7 +5516,7 @@ public class RSSConnector extends org.ap
c.set(Calendar.SECOND,value);
c.set(Calendar.MILLISECOND,0);
- return new Long(c.getTimeInMillis());
+ return new Date(c.getTimeInMillis());
}
catch (NumberFormatException e)
{
Modified: manifoldcf/trunk/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SharePointRepository.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SharePointRepository.java?rev=1454590&r1=1454589&r2=1454590&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SharePointRepository.java (original)
+++ manifoldcf/trunk/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SharePointRepository.java Fri Mar 8 21:39:27 2013
@@ -1105,7 +1105,7 @@ public class SharePointRepository extend
{
if (dateTimeValue == null)
return null;
- java.text.DateFormat iso8601Format = new java.text.SimpleDateFormat ("yyyy-MM-dd'T'HH:mm:ssZ");
+ java.text.DateFormat iso8601Format = new java.text.SimpleDateFormat ("yyyy-MM-dd'T'HH:mm:ss'Z'");
try
{
return iso8601Format.parse(dateTimeValue);
Modified: manifoldcf/trunk/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/WikiConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/WikiConnector.java?rev=1454590&r1=1454589&r2=1454590&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/WikiConnector.java (original)
+++ manifoldcf/trunk/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/WikiConnector.java Fri Mar 8 21:39:27 2013
@@ -3661,7 +3661,7 @@ public class WikiConnector extends org.a
protected static Date parseISODate(String isoDateValue)
{
- java.text.DateFormat iso8601Format = new java.text.SimpleDateFormat ("yyyy-MM-dd'T'HH:mm:ssZ");
+ java.text.DateFormat iso8601Format = new java.text.SimpleDateFormat ("yyyy-MM-dd'T'HH:mm:ss'Z'");
try
{
return iso8601Format.parse(isoDateValue);