You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by ab...@apache.org on 2005/08/04 23:41:33 UTC

svn commit: r227501 - /lucene/nutch/trunk/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java

Author: ab
Date: Thu Aug  4 14:41:18 2005
New Revision: 227501

URL: http://svn.apache.org/viewcvs?rev=227501&view=rev
Log:
Apply patches in NUTCH-65, submitted by Lutischan Ferenc.

Modified:
    lucene/nutch/trunk/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java

Modified: lucene/nutch/trunk/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java?rev=227501&r1=227500&r2=227501&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java (original)
+++ lucene/nutch/trunk/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java Thu Aug  4 14:41:18 2005
@@ -48,6 +48,7 @@
 import java.text.SimpleDateFormat;
 
 import java.util.Date;
+import java.util.Locale;
 import java.util.TimeZone;
 import java.util.Enumeration;
 import java.util.Properties;
@@ -131,14 +132,21 @@
     long time = -1;
     try {
       time = HttpDateFormat.toLong(date);
-    } catch  (ParseException e) {
+    } catch (ParseException e) {
       // try to parse it as date in alternative format
+      String date2 = date;
       try {
-        DateFormat df = new SimpleDateFormat("EEE MMM dd HH:mm:ss yyyy zzz");
-        Date d = df.parse(date);
-        time = d.getTime();
+        if (date.length() > 25 ) date2 = date.substring(0, 25);
+        DateFormat df = new SimpleDateFormat("EEE, dd MMM yyyy HH:mm:ss", Locale.US);
+        time = df.parse(date2).getTime();
       } catch (Exception e1) {
-        LOG.warning(url+": can't parse erroneous date: "+date);
+        try {
+          if (date.length() > 24 ) date2 = date.substring(0, 24);
+          DateFormat df = new SimpleDateFormat("EEE MMM dd HH:mm:ss yyyy", Locale.US);
+          time = df.parse(date2).getTime();
+        } catch (Exception e2) {
+          LOG.warning(url + ": can't parse erroneous date: " + date);
+        }
       }
     }
     return time;