You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by le...@apache.org on 2013/06/17 21:16:31 UTC

svn commit: r1493892 - in /nutch/branches/2.x: CHANGES.txt src/java/org/apache/nutch/util/StringUtil.java src/java/org/apache/nutch/util/TableUtil.java

Author: lewismc
Date: Mon Jun 17 19:16:31 2013
New Revision: 1493892

URL: http://svn.apache.org/r1493892
Log:
NUTCH-1420 Get rid of the dreaded �

Modified:
    nutch/branches/2.x/CHANGES.txt
    nutch/branches/2.x/src/java/org/apache/nutch/util/StringUtil.java
    nutch/branches/2.x/src/java/org/apache/nutch/util/TableUtil.java

Modified: nutch/branches/2.x/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/CHANGES.txt?rev=1493892&r1=1493891&r2=1493892&view=diff
==============================================================================
--- nutch/branches/2.x/CHANGES.txt (original)
+++ nutch/branches/2.x/CHANGES.txt Mon Jun 17 19:16:31 2013
@@ -2,6 +2,8 @@ Nutch Change Log
 
 Current Development
 
+* NUTCH-1420 Get rid of the dreaded � (markus + lewismc)
+
 * NUTCH-1578 Upgrade to Hadoop 1.2.0 (markus)
 
 * NUTCH-1522 Upgrade to Tika 1.3 (jnioche)

Modified: nutch/branches/2.x/src/java/org/apache/nutch/util/StringUtil.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/util/StringUtil.java?rev=1493892&r1=1493891&r2=1493892&view=diff
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/util/StringUtil.java (original)
+++ nutch/branches/2.x/src/java/org/apache/nutch/util/StringUtil.java Mon Jun 17 19:16:31 2013
@@ -125,6 +125,16 @@ public class StringUtil {
   public static boolean isEmpty(String str) {
     return (str == null) || (str.equals(""));
   }
+  
+
+  /**
+   * Takes in a String value and cleans out any offending "�"
+   * @param value the dirty String value.
+   * @return clean String
+   */
+  public static String cleanField(String value) {
+    return value.replaceAll("�", "");
+  }
 
   public static void main(String[] args) {
     if (args.length != 1)

Modified: nutch/branches/2.x/src/java/org/apache/nutch/util/TableUtil.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/util/TableUtil.java?rev=1493892&r1=1493891&r2=1493892&view=diff
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/util/TableUtil.java (original)
+++ nutch/branches/2.x/src/java/org/apache/nutch/util/TableUtil.java Mon Jun 17 19:16:31 2013
@@ -22,6 +22,7 @@ import java.nio.ByteBuffer;
 
 import org.apache.avro.util.Utf8;
 import org.apache.commons.lang.StringUtils;
+import org.apache.nutch.util.StringUtil;
 
 public class TableUtil {
 
@@ -144,14 +145,15 @@ public class TableUtil {
   
   
   /**
-   * Convert given Utf8 instance to String
+   * Convert given Utf8 instance to String and and cleans out 
+   * any offending "�" from the String.
    *
    * @param utf8
    *          Utf8 object
    * @return string-ifed Utf8 object or null if Utf8 instance is null
    */
   public static String toString(Utf8 utf8) {
-    return (utf8 == null ? null : utf8.toString());
+    return (utf8 == null ? null : StringUtil.cleanField(utf8.toString()));
   }
 
 }