You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by je...@apache.org on 2005/09/02 00:21:06 UTC

svn commit: r265794 - in /lucene/nutch/trunk: lib/commons-lang-2.1.jar src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java

Author: jerome
Date: Thu Sep  1 15:20:51 2005
New Revision: 265794

URL: http://svn.apache.org/viewcvs?rev=265794&view=rev
Log:
NUTCH-65, Handles more modification-date format

Added:
    lucene/nutch/trunk/lib/commons-lang-2.1.jar   (with props)
Modified:
    lucene/nutch/trunk/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java

Added: lucene/nutch/trunk/lib/commons-lang-2.1.jar
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/lib/commons-lang-2.1.jar?rev=265794&view=auto
==============================================================================
Binary file - no diff available.

Propchange: lucene/nutch/trunk/lib/commons-lang-2.1.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Modified: lucene/nutch/trunk/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java?rev=265794&r1=265793&r2=265794&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java (original)
+++ lucene/nutch/trunk/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java Thu Sep  1 15:20:51 2005
@@ -53,7 +53,7 @@
 import java.util.Enumeration;
 import java.util.Properties;
 
-
+import org.apache.commons.lang.time.DateUtils;
 /**
  * Add (or reset) a few metaData properties as respective fields
  * (if they are available), so that they can be displayed by more.jsp
@@ -133,21 +133,37 @@
     try {
       time = HttpDateFormat.toLong(date);
     } catch (ParseException e) {
-      // try to parse it as date in alternative format
-      String date2 = date;
-      try {
-        if (date.length() > 25 ) date2 = date.substring(0, 25);
-        DateFormat df = new SimpleDateFormat("EEE, dd MMM yyyy HH:mm:ss", Locale.US);
-        time = df.parse(date2).getTime();
-      } catch (Exception e1) {
-        try {
-          if (date.length() > 24 ) date2 = date.substring(0, 24);
-          DateFormat df = new SimpleDateFormat("EEE MMM dd HH:mm:ss yyyy", Locale.US);
-          time = df.parse(date2).getTime();
-        } catch (Exception e2) {
-          LOG.warning(url + ": can't parse erroneous date: " + date);
-        }
-      }
+	// try to parse it as date in alternative format
+	try {
+	    Date parsedDate = DateUtils.parseDate(date,
+		  new String [] {
+		      "EEE MMM dd HH:mm:ss yyyy",
+		      "EEE MMM dd HH:mm:ss yyyy zzz",
+		      "EEE, MMM dd HH:mm:ss yyyy zzz",
+		      "EEE, dd MMM yyyy HH:mm:ss zzz",
+		      "EEE,dd MMM yyyy HH:mm:ss zzz",
+		      "EEE, dd MMM yyyy HH:mm:sszzz",
+		      "EEE, dd MMM yyyy HH:mm:ss",
+		      "EEE, dd-MMM-yy HH:mm:ss zzz",
+		      "yyyy/MM/dd HH:mm:ss.SSS zzz",
+		      "yyyy/MM/dd HH:mm:ss.SSS",
+		      "yyyy/MM/dd HH:mm:ss zzz",
+		      "yyyy/MM/dd",
+		      "yyyy.MM.dd HH:mm:ss",
+		      "yyyy-MM-dd HH:mm",
+		      "MMM dd yyyy HH:mm:ss. zzz",
+		      "MMM dd yyyy HH:mm:ss zzz",
+		      "dd.MM.yyyy HH:mm:ss zzz",
+		      "dd MM yyyy HH:mm:ss zzz",
+		      "dd.MM.yyyy; HH:mm:ss",
+		      "dd.MM.yyyy HH:mm:ss",
+		      "dd.MM.yyyy zzz"
+		  });
+	    time = parsedDate.getTime();
+	    //	    LOG.warning(url + ": parsed date: " + date +" to:"+time);
+	} catch (Exception e2) {
+	    LOG.warning(url + ": can't parse erroneous date: " + date);
+	}
     }
     return time;
   }