You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by je...@apache.org on 2005/09/02 00:21:06 UTC
svn commit: r265794 - in /lucene/nutch/trunk: lib/commons-lang-2.1.jar
src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java
Author: jerome
Date: Thu Sep 1 15:20:51 2005
New Revision: 265794
URL: http://svn.apache.org/viewcvs?rev=265794&view=rev
Log:
NUTCH-65, Handles more modification-date format
Added:
lucene/nutch/trunk/lib/commons-lang-2.1.jar (with props)
Modified:
lucene/nutch/trunk/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java
Added: lucene/nutch/trunk/lib/commons-lang-2.1.jar
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/lib/commons-lang-2.1.jar?rev=265794&view=auto
==============================================================================
Binary file - no diff available.
Propchange: lucene/nutch/trunk/lib/commons-lang-2.1.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Modified: lucene/nutch/trunk/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java?rev=265794&r1=265793&r2=265794&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java (original)
+++ lucene/nutch/trunk/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java Thu Sep 1 15:20:51 2005
@@ -53,7 +53,7 @@
import java.util.Enumeration;
import java.util.Properties;
-
+import org.apache.commons.lang.time.DateUtils;
/**
* Add (or reset) a few metaData properties as respective fields
* (if they are available), so that they can be displayed by more.jsp
@@ -133,21 +133,37 @@
try {
time = HttpDateFormat.toLong(date);
} catch (ParseException e) {
- // try to parse it as date in alternative format
- String date2 = date;
- try {
- if (date.length() > 25 ) date2 = date.substring(0, 25);
- DateFormat df = new SimpleDateFormat("EEE, dd MMM yyyy HH:mm:ss", Locale.US);
- time = df.parse(date2).getTime();
- } catch (Exception e1) {
- try {
- if (date.length() > 24 ) date2 = date.substring(0, 24);
- DateFormat df = new SimpleDateFormat("EEE MMM dd HH:mm:ss yyyy", Locale.US);
- time = df.parse(date2).getTime();
- } catch (Exception e2) {
- LOG.warning(url + ": can't parse erroneous date: " + date);
- }
- }
+ // try to parse it as date in alternative format
+ try {
+ Date parsedDate = DateUtils.parseDate(date,
+ new String [] {
+ "EEE MMM dd HH:mm:ss yyyy",
+ "EEE MMM dd HH:mm:ss yyyy zzz",
+ "EEE, MMM dd HH:mm:ss yyyy zzz",
+ "EEE, dd MMM yyyy HH:mm:ss zzz",
+ "EEE,dd MMM yyyy HH:mm:ss zzz",
+ "EEE, dd MMM yyyy HH:mm:sszzz",
+ "EEE, dd MMM yyyy HH:mm:ss",
+ "EEE, dd-MMM-yy HH:mm:ss zzz",
+ "yyyy/MM/dd HH:mm:ss.SSS zzz",
+ "yyyy/MM/dd HH:mm:ss.SSS",
+ "yyyy/MM/dd HH:mm:ss zzz",
+ "yyyy/MM/dd",
+ "yyyy.MM.dd HH:mm:ss",
+ "yyyy-MM-dd HH:mm",
+ "MMM dd yyyy HH:mm:ss. zzz",
+ "MMM dd yyyy HH:mm:ss zzz",
+ "dd.MM.yyyy HH:mm:ss zzz",
+ "dd MM yyyy HH:mm:ss zzz",
+ "dd.MM.yyyy; HH:mm:ss",
+ "dd.MM.yyyy HH:mm:ss",
+ "dd.MM.yyyy zzz"
+ });
+ time = parsedDate.getTime();
+ // LOG.warning(url + ": parsed date: " + date +" to:"+time);
+ } catch (Exception e2) {
+ LOG.warning(url + ": can't parse erroneous date: " + date);
+ }
}
return time;
}