You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by ma...@apache.org on 2011/04/28 11:57:12 UTC

svn commit: r1097390 - in /nutch/branches/branch-1.3: CHANGES.txt src/java/org/apache/nutch/indexer/solr/SolrDeleteDuplicates.java

Author: markus
Date: Thu Apr 28 09:57:12 2011
New Revision: 1097390

URL: http://svn.apache.org/viewvc?rev=1097390&view=rev
Log:
NUTCH-986 Dedup fails due to incorrect date format

Modified:
    nutch/branches/branch-1.3/CHANGES.txt
    nutch/branches/branch-1.3/src/java/org/apache/nutch/indexer/solr/SolrDeleteDuplicates.java

Modified: nutch/branches/branch-1.3/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/branches/branch-1.3/CHANGES.txt?rev=1097390&r1=1097389&r2=1097390&view=diff
==============================================================================
--- nutch/branches/branch-1.3/CHANGES.txt (original)
+++ nutch/branches/branch-1.3/CHANGES.txt Thu Apr 28 09:57:12 2011
@@ -2,6 +2,8 @@ Nutch Change Log
 
 Release 1.3 - 4/21/2011
 
+* NUTCH 986 SolrDedup fails due to date incorrect format (markus)
+
 * NUTCH-977 SolrMappingReader uses hardcoded configuration parameter name for mapping file (markus)
 
 * NUTCH-976 Rename properties solrindex.* to solr.* (markus)

Modified: nutch/branches/branch-1.3/src/java/org/apache/nutch/indexer/solr/SolrDeleteDuplicates.java
URL: http://svn.apache.org/viewvc/nutch/branches/branch-1.3/src/java/org/apache/nutch/indexer/solr/SolrDeleteDuplicates.java?rev=1097390&r1=1097389&r2=1097390&view=diff
==============================================================================
--- nutch/branches/branch-1.3/src/java/org/apache/nutch/indexer/solr/SolrDeleteDuplicates.java (original)
+++ nutch/branches/branch-1.3/src/java/org/apache/nutch/indexer/solr/SolrDeleteDuplicates.java Thu Apr 28 09:57:12 2011
@@ -21,7 +21,9 @@ import java.io.DataOutput;
 import java.io.IOException;
 import java.net.MalformedURLException;
 import java.text.SimpleDateFormat;
+import java.text.DateFormat;
 import java.util.Iterator;
+import java.util.Date;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -126,7 +128,15 @@ Tool {
     public void readSolrDocument(SolrDocument doc) {
       id = (String)doc.getFieldValue(SolrConstants.ID_FIELD);
       boost = (Float)doc.getFieldValue(SolrConstants.BOOST_FIELD);
-      tstamp = (Long)doc.getFieldValue(SolrConstants.TIMESTAMP_FIELD);
+
+      // Attempt to convert Solr formatted date to internally used long
+      try {
+        DateFormat formatter = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'");
+        Date date = (Date)formatter.parse((String)doc.getFieldValue(SolrConstants.TIMESTAMP_FIELD));
+        tstamp = (Long)date.getTime();
+      } catch (Exception e) {
+        LOG.error("Could not convert date to long: " + e);
+      }
     }
 
     public void readFields(DataInput in) throws IOException {