You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by le...@apache.org on 2012/11/07 19:47:54 UTC
svn commit: r1406749 - in /nutch/branches/2.x: CHANGES.txt
src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java
Author: lewismc
Date: Wed Nov 7 18:47:54 2012
New Revision: 1406749
URL: http://svn.apache.org/viewvc?rev=1406749&view=rev
Log:
NUTCH-1493 Error adding field 'contentLength'='' during solrindex using index-more
Modified:
nutch/branches/2.x/CHANGES.txt
nutch/branches/2.x/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java
Modified: nutch/branches/2.x/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/CHANGES.txt?rev=1406749&r1=1406748&r2=1406749&view=diff
==============================================================================
--- nutch/branches/2.x/CHANGES.txt (original)
+++ nutch/branches/2.x/CHANGES.txt Wed Nov 7 18:47:54 2012
@@ -2,6 +2,8 @@ Nutch Change Log
Release 2.2 - Current Development
+* NUTCH-1493 Error adding field 'contentLength'='' during solrindex using index-more (Nathan Gass via lewismc)
+
* NUTCH-1491 Strip UTF-8 non-character codepoints in title (Nathan Gass via markus)
* NUTCH-1421 RegexURLNormalizer to only skip rules with invalid patterns (snagel)
Modified: nutch/branches/2.x/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java?rev=1406749&r1=1406748&r2=1406749&view=diff
==============================================================================
--- nutch/branches/2.x/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java (original)
+++ nutch/branches/2.x/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java Wed Nov 7 18:47:54 2012
@@ -139,9 +139,12 @@ public class MoreIndexingFilter implemen
private NutchDocument addLength(NutchDocument doc, WebPage page, String url) {
Utf8 contentLength = page.getFromHeaders(new Utf8(
HttpHeaders.CONTENT_LENGTH));
- if (contentLength != null)
+ if (contentLength != null) {
// NUTCH-1010 ContentLength not trimmed
- doc.add("contentLength", contentLength.toString().trim());
+ String trimmed = contentLength.toString().trim();
+ if (!trimmed.isEmpty())
+ doc.add("contentLength", trimmed);
+ }
return doc;
}