You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2013/03/12 02:12:51 UTC
svn commit: r1455382 - in /manifoldcf/trunk/connectors/rss/connector/src:
main/java/org/apache/manifoldcf/crawler/connectors/rss/RSSConnector.java
test/java/org/apache/manifoldcf/crawler/connectors/rss/tests/DateTest.java
Author: kwright
Date: Tue Mar 12 01:12:51 2013
New Revision: 1455382
URL: http://svn.apache.org/r1455382
Log:
Simplify ISO 8601 date parser; add more tests for it.
Modified:
manifoldcf/trunk/connectors/rss/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/rss/RSSConnector.java
manifoldcf/trunk/connectors/rss/connector/src/test/java/org/apache/manifoldcf/crawler/connectors/rss/tests/DateTest.java
Modified: manifoldcf/trunk/connectors/rss/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/rss/RSSConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/rss/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/rss/RSSConnector.java?rev=1455382&r1=1455381&r2=1455382&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/rss/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/rss/RSSConnector.java (original)
+++ manifoldcf/trunk/connectors/rss/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/rss/RSSConnector.java Tue Mar 12 01:12:51 2013
@@ -5168,108 +5168,6 @@ public class RSSConnector extends org.ap
}
}
- /** Parse an ISO8601 date */
- protected static Date parseISO8601Date(String dateValue)
- {
- dateValue = dateValue.trim();
- // Format: YYYY-MM-DDTHH:MM:SSZ
- // 2007-11-11T05:00:00Z
- int index = dateValue.indexOf("-");
- if (index == -1)
- return null;
- String year = dateValue.substring(0,index);
- dateValue = dateValue.substring(index+1);
- index = dateValue.indexOf("-");
- if (index == -1)
- return null;
- String month = dateValue.substring(0,index);
- dateValue = dateValue.substring(index+1);
- index = dateValue.indexOf("T");
- String day;
- String hour = "0";
- String minute = "0";
- String second = "0";
- String timezone = "GMT";
- if (index != -1)
- {
- day = dateValue.substring(0,index);
- dateValue = dateValue.substring(index+1);
- index = dateValue.indexOf(":");
- if (index == -1)
- return null;
- hour = dateValue.substring(0,index);
- dateValue = dateValue.substring(index+1);
- index = dateValue.indexOf(":");
- if (index != -1)
- {
- minute = dateValue.substring(0,index);
- dateValue = dateValue.substring(index+1);
- if (dateValue.endsWith("Z"))
- {
- index = dateValue.indexOf("Z");
- if (index == -1)
- return null;
- }
- else
- {
- index = dateValue.indexOf("+");
- if (index == -1)
- index = dateValue.indexOf("-");
- if (index == -1)
- return null;
- timezone = "GMT"+dateValue.substring(index);
- }
- second = dateValue.substring(0,index);
- }
- else
- {
- minute = dateValue;
- }
- }
- else
- {
- day = dateValue;
- }
-
- // Now construct a calendar object from this
- TimeZone tz = TimeZone.getTimeZone(timezone);
-
- Calendar c = new GregorianCalendar(tz);
- try
- {
- int value = Integer.parseInt(year);
- c.set(Calendar.YEAR,value);
-
- value = Integer.parseInt(month);
- c.set(Calendar.MONTH,value-1);
-
- value = Integer.parseInt(day);
- c.set(Calendar.DAY_OF_MONTH,value);
-
- value = Integer.parseInt(hour);
- c.set(Calendar.HOUR_OF_DAY,value);
-
- value = Integer.parseInt(minute);
- c.set(Calendar.MINUTE,value);
-
- int index2 = second.indexOf(".");
- if (index2 != -1)
- second = second.substring(0,index2);
-
- value = Integer.parseInt(second);
- c.set(Calendar.SECOND,value);
-
- c.set(Calendar.MILLISECOND,0);
- return new Date(c.getTimeInMillis());
- }
- catch (NumberFormatException e)
- {
- return null;
- }
-
-
- }
-
/** Parse a China Daily News date */
protected static Date parseChinaDate(String dateValue)
{
@@ -5355,20 +5253,34 @@ public class RSSConnector extends org.ap
}
- /**
+ /** Parse ISO 8601 dates, and their common variants.
+ */
protected static Date parseISO8601Date(String isoDateValue)
{
- java.text.DateFormat iso8601Format = new java.text.SimpleDateFormat ("yyyy-MM-dd'T'HH:mm:ss'Z'");
+ // There are a number of variations on the basic format.
+ // We'll look for key characters to help is determine which is which.
+ StringBuilder isoFormatString = new StringBuilder("yy");
+ if (isoDateValue.length() > 2 && isoDateValue.charAt(2) != '-')
+ isoFormatString.append("yy");
+ isoFormatString.append("-MM-dd'T'HH:mm:ss");
+ if (isoDateValue.indexOf(".") != -1)
+ isoFormatString.append(".SSS");
+ if (isoDateValue.endsWith("Z"))
+ isoFormatString.append("'Z'");
+ else
+ isoFormatString.append("Z"); // RFC 822 time, including general time zones
+ java.text.DateFormat iso8601Format = new java.text.SimpleDateFormat(isoFormatString.toString());
try
{
return iso8601Format.parse(isoDateValue);
}
catch (java.text.ParseException e)
{
+ System.out.println("Date value: '"+isoDateValue+"'");
+ e.printStackTrace();
return null;
}
}
- */
/** Timezone mapping from RFC822 timezones to ones understood by Java */
Modified: manifoldcf/trunk/connectors/rss/connector/src/test/java/org/apache/manifoldcf/crawler/connectors/rss/tests/DateTest.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/rss/connector/src/test/java/org/apache/manifoldcf/crawler/connectors/rss/tests/DateTest.java?rev=1455382&r1=1455381&r2=1455382&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/rss/connector/src/test/java/org/apache/manifoldcf/crawler/connectors/rss/tests/DateTest.java (original)
+++ manifoldcf/trunk/connectors/rss/connector/src/test/java/org/apache/manifoldcf/crawler/connectors/rss/tests/DateTest.java Tue Mar 12 01:12:51 2013
@@ -30,11 +30,15 @@ public class DateTest extends RSSConnect
public void iso8601()
throws Exception
{
- Date d = RSSConnector.parseISO8601Date("2012-11-15T01:32:33.344Z");
+ Date d = RSSConnector.parseISO8601Date("96-11-15T01:32:33.344GMT");
+ assertNotNull(d);
+ d = RSSConnector.parseISO8601Date("2012-11-15T01:32:33.344Z");
assertNotNull(d);
d = RSSConnector.parseISO8601Date("2012-11-15T01:32:33Z");
assertNotNull(d);
- d = RSSConnector.parseISO8601Date("2012-11-15T01:32:33+01");
+ d = RSSConnector.parseISO8601Date("2012-11-15T01:32:33+0100");
+ assertNotNull(d);
+ d = RSSConnector.parseISO8601Date("2012-11-15T01:32:33GMT-03:00");
assertNotNull(d);
}