You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by dv...@apache.org on 2012/09/03 00:37:31 UTC
svn commit: r1380078 - in /pig/trunk: CHANGES.txt
src/org/apache/pig/builtin/Utf8StorageConverter.java
Author: dvryaboy
Date: Sun Sep 2 22:37:30 2012
New Revision: 1380078
URL: http://svn.apache.org/viewvc?rev=1380078&view=rev
Log:
PIG-2835: Optimizing the convertion from bytes to Integer/Long
Modified:
pig/trunk/CHANGES.txt
pig/trunk/src/org/apache/pig/builtin/Utf8StorageConverter.java
Modified: pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1380078&r1=1380077&r2=1380078&view=diff
==============================================================================
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Sun Sep 2 22:37:30 2012
@@ -24,6 +24,8 @@ INCOMPATIBLE CHANGES
IMPROVEMENTS
+PIG-2835: Optimizing the convertion from bytes to Integer/Long (jay23jack via dvryaboy)
+
PIG-2886: Add Scan TimeRange to HBaseStorage (ted.m via dvryaboy)
PIG-2895: jodatime jar missing in pig-withouthadoop.jar (thejas)
Modified: pig/trunk/src/org/apache/pig/builtin/Utf8StorageConverter.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/builtin/Utf8StorageConverter.java?rev=1380078&r1=1380077&r2=1380078&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/builtin/Utf8StorageConverter.java (original)
+++ pig/trunk/src/org/apache/pig/builtin/Utf8StorageConverter.java Sun Sep 2 22:37:30 2012
@@ -398,15 +398,41 @@ public class Utf8StorageConverter implem
return null;
}
}
+
+ /**
+ * Sanity check of whether this number is a valid integer or long.
+ * @param number the number to check
+ * @return true if it doesn't contain any invalid characters, i.e. only contains digits and '-'
+ */
+ private static boolean sanityCheckIntegerLong(String number){
+ for (int i=0; i < number.length(); i++){
+ if (number.charAt(i) >= '0' && number.charAt(i) <='9' || i == 0 && number.charAt(i) == '-'){
+ // valid one
+ }
+ else{
+ // contains invalid characters, must not be a integer or long.
+ return false;
+ }
+ }
+ return true;
+ }
@Override
public Integer bytesToInteger(byte[] b) throws IOException {
if(b == null)
return null;
String s = new String(b);
- try {
- return Integer.valueOf(s);
- } catch (NumberFormatException nfe) {
+ Integer ret = null;
+
+ // See PIG-2835. Using exception handling to check if it's a double is very expensive.
+ // So we write our sanity check.
+ if (sanityCheckIntegerLong(s)){
+ try {
+ ret = Integer.valueOf(s);
+ } catch (NumberFormatException nfe) {
+ }
+ }
+ if (ret == null){
// It's possible that this field can be interpreted as a double.
// Unfortunately Java doesn't handle this in Integer.valueOf. So
// we need to try to convert it to a double and if that works then
@@ -424,11 +450,12 @@ public class Utf8StorageConverter implem
} catch (NumberFormatException nfe2) {
LogUtils.warn(this, "Unable to interpret value " + Arrays.toString(b) + " in field being " +
"converted to int, caught NumberFormatException <" +
- nfe.getMessage() + "> field discarded",
+ nfe2.getMessage() + "> field discarded",
PigWarning.FIELD_DISCARDED_TYPE_CONVERSION_FAILED, mLog);
return null;
}
}
+ return ret;
}
@Override
@@ -442,9 +469,17 @@ public class Utf8StorageConverter implem
s = new String(b);
}
- try {
- return Long.valueOf(s);
- } catch (NumberFormatException nfe) {
+ // See PIG-2835. Using exception handling to check if it's a double is very expensive.
+ // So we write our sanity check.
+ Long ret = null;
+ if (sanityCheckIntegerLong(s)) {
+ try {
+ ret = Long.valueOf(s);
+ } catch (NumberFormatException nfe) {
+ }
+ }
+
+ if (ret == null) {
// It's possible that this field can be interpreted as a double.
// Unfortunately Java doesn't handle this in Long.valueOf. So
// we need to try to convert it to a double and if that works then
@@ -462,11 +497,12 @@ public class Utf8StorageConverter implem
} catch (NumberFormatException nfe2) {
LogUtils.warn(this, "Unable to interpret value " + Arrays.toString(b) + " in field being " +
"converted to long, caught NumberFormatException <" +
- nfe.getMessage() + "> field discarded",
+ nfe2.getMessage() + "> field discarded",
PigWarning.FIELD_DISCARDED_TYPE_CONVERSION_FAILED, mLog);
return null;
}
}
+ return ret;
}
@Override