You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by le...@apache.org on 2013/06/17 21:16:31 UTC
svn commit: r1493892 - in /nutch/branches/2.x: CHANGES.txt
src/java/org/apache/nutch/util/StringUtil.java
src/java/org/apache/nutch/util/TableUtil.java
Author: lewismc
Date: Mon Jun 17 19:16:31 2013
New Revision: 1493892
URL: http://svn.apache.org/r1493892
Log:
NUTCH-1420 Get rid of the dreaded �
Modified:
nutch/branches/2.x/CHANGES.txt
nutch/branches/2.x/src/java/org/apache/nutch/util/StringUtil.java
nutch/branches/2.x/src/java/org/apache/nutch/util/TableUtil.java
Modified: nutch/branches/2.x/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/CHANGES.txt?rev=1493892&r1=1493891&r2=1493892&view=diff
==============================================================================
--- nutch/branches/2.x/CHANGES.txt (original)
+++ nutch/branches/2.x/CHANGES.txt Mon Jun 17 19:16:31 2013
@@ -2,6 +2,8 @@ Nutch Change Log
Current Development
+* NUTCH-1420 Get rid of the dreaded � (markus + lewismc)
+
* NUTCH-1578 Upgrade to Hadoop 1.2.0 (markus)
* NUTCH-1522 Upgrade to Tika 1.3 (jnioche)
Modified: nutch/branches/2.x/src/java/org/apache/nutch/util/StringUtil.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/util/StringUtil.java?rev=1493892&r1=1493891&r2=1493892&view=diff
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/util/StringUtil.java (original)
+++ nutch/branches/2.x/src/java/org/apache/nutch/util/StringUtil.java Mon Jun 17 19:16:31 2013
@@ -125,6 +125,16 @@ public class StringUtil {
public static boolean isEmpty(String str) {
return (str == null) || (str.equals(""));
}
+
+
+ /**
+ * Takes in a String value and cleans out any offending "�"
+ * @param value the dirty String value.
+ * @return clean String
+ */
+ public static String cleanField(String value) {
+ return value.replaceAll("�", "");
+ }
public static void main(String[] args) {
if (args.length != 1)
Modified: nutch/branches/2.x/src/java/org/apache/nutch/util/TableUtil.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/util/TableUtil.java?rev=1493892&r1=1493891&r2=1493892&view=diff
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/util/TableUtil.java (original)
+++ nutch/branches/2.x/src/java/org/apache/nutch/util/TableUtil.java Mon Jun 17 19:16:31 2013
@@ -22,6 +22,7 @@ import java.nio.ByteBuffer;
import org.apache.avro.util.Utf8;
import org.apache.commons.lang.StringUtils;
+import org.apache.nutch.util.StringUtil;
public class TableUtil {
@@ -144,14 +145,15 @@ public class TableUtil {
/**
- * Convert given Utf8 instance to String
+ * Convert given Utf8 instance to String and and cleans out
+ * any offending "�" from the String.
*
* @param utf8
* Utf8 object
* @return string-ifed Utf8 object or null if Utf8 instance is null
*/
public static String toString(Utf8 utf8) {
- return (utf8 == null ? null : utf8.toString());
+ return (utf8 == null ? null : StringUtil.cleanField(utf8.toString()));
}
}