You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by ma...@apache.org on 2012/11/06 10:17:38 UTC

svn commit: r1406077 - in /nutch/branches/2.x: CHANGES.txt src/java/org/apache/nutch/indexer/solr/SolrWriter.java

Author: markus
Date: Tue Nov  6 09:17:38 2012
New Revision: 1406077

URL: http://svn.apache.org/viewvc?rev=1406077&view=rev
Log:
NUTCH-1491 Strip UTF-8 non-character codepoints in title

Modified:
    nutch/branches/2.x/CHANGES.txt
    nutch/branches/2.x/src/java/org/apache/nutch/indexer/solr/SolrWriter.java

Modified: nutch/branches/2.x/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/CHANGES.txt?rev=1406077&r1=1406076&r2=1406077&view=diff
==============================================================================
--- nutch/branches/2.x/CHANGES.txt (original)
+++ nutch/branches/2.x/CHANGES.txt Tue Nov  6 09:17:38 2012
@@ -2,6 +2,8 @@ Nutch Change Log
 
 Release 2.2 - Current Development
 
+* NUTCH-1491 Strip UTF-8 non-character codepoints in title (Nathan Gass via markus)
+
 * NUTCH-1421 RegexURLNormalizer to only skip rules with invalid patterns (snagel)
 
 * NUTCH-1433 Upgrade to Tika 1.2 (jnioche)

Modified: nutch/branches/2.x/src/java/org/apache/nutch/indexer/solr/SolrWriter.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/indexer/solr/SolrWriter.java?rev=1406077&r1=1406076&r2=1406077&view=diff
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/indexer/solr/SolrWriter.java (original)
+++ nutch/branches/2.x/src/java/org/apache/nutch/indexer/solr/SolrWriter.java Tue Nov  6 09:17:38 2012
@@ -60,7 +60,7 @@ public class SolrWriter implements Nutch
       for (final String val : e.getValue()) {
 
         Object val2 = val;
-        if (e.getKey().equals("content")) {
+        if (e.getKey().equals("content") || e.getKey().equals("title")) {
           val2 = stripNonCharCodepoints((String)val);
         }