You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by dv...@apache.org on 2012/09/03 00:37:31 UTC

svn commit: r1380078 - in /pig/trunk: CHANGES.txt src/org/apache/pig/builtin/Utf8StorageConverter.java

Author: dvryaboy
Date: Sun Sep  2 22:37:30 2012
New Revision: 1380078

URL: http://svn.apache.org/viewvc?rev=1380078&view=rev
Log:
PIG-2835: Optimizing the convertion from bytes to Integer/Long

Modified:
    pig/trunk/CHANGES.txt
    pig/trunk/src/org/apache/pig/builtin/Utf8StorageConverter.java

Modified: pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1380078&r1=1380077&r2=1380078&view=diff
==============================================================================
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Sun Sep  2 22:37:30 2012
@@ -24,6 +24,8 @@ INCOMPATIBLE CHANGES
 
 IMPROVEMENTS
 
+PIG-2835: Optimizing the convertion from bytes to Integer/Long (jay23jack via dvryaboy)
+
 PIG-2886: Add Scan TimeRange to HBaseStorage (ted.m via dvryaboy)
 
 PIG-2895: jodatime jar missing in pig-withouthadoop.jar  (thejas)

Modified: pig/trunk/src/org/apache/pig/builtin/Utf8StorageConverter.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/builtin/Utf8StorageConverter.java?rev=1380078&r1=1380077&r2=1380078&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/builtin/Utf8StorageConverter.java (original)
+++ pig/trunk/src/org/apache/pig/builtin/Utf8StorageConverter.java Sun Sep  2 22:37:30 2012
@@ -398,15 +398,41 @@ public class Utf8StorageConverter implem
             return null;
         }
     }
+    
+    /**
+     * Sanity check of whether this number is a valid integer or long. 
+     * @param number the number to check
+     * @return true if it doesn't contain any invalid characters, i.e. only contains digits and '-'
+     */
+    private static boolean sanityCheckIntegerLong(String number){
+        for (int i=0; i < number.length(); i++){
+            if (number.charAt(i) >= '0' && number.charAt(i) <='9' || i == 0 && number.charAt(i) == '-'){
+                // valid one
+            }
+            else{
+                // contains invalid characters, must not be a integer or long.
+                return false;
+            }
+        }
+        return true;
+    }
 
     @Override
     public Integer bytesToInteger(byte[] b) throws IOException {
         if(b == null)
             return null;
         String s = new String(b);
-        try {
-            return Integer.valueOf(s);
-        } catch (NumberFormatException nfe) {
+        Integer ret = null;
+        
+        // See PIG-2835. Using exception handling to check if it's a double is very expensive.
+        // So we write our sanity check.
+        if (sanityCheckIntegerLong(s)){
+            try {
+                ret = Integer.valueOf(s);
+            } catch (NumberFormatException nfe) {
+            }
+        }
+        if (ret == null){
             // It's possible that this field can be interpreted as a double.
             // Unfortunately Java doesn't handle this in Integer.valueOf.  So
             // we need to try to convert it to a double and if that works then
@@ -424,11 +450,12 @@ public class Utf8StorageConverter implem
             } catch (NumberFormatException nfe2) {
                 LogUtils.warn(this, "Unable to interpret value " + Arrays.toString(b) + " in field being " +
                         "converted to int, caught NumberFormatException <" +
-                        nfe.getMessage() + "> field discarded", 
+                        nfe2.getMessage() + "> field discarded", 
                         PigWarning.FIELD_DISCARDED_TYPE_CONVERSION_FAILED, mLog);
                 return null;
             }
         }
+        return ret;
     }
     
     @Override
@@ -442,9 +469,17 @@ public class Utf8StorageConverter implem
             s = new String(b);
         }
         
-        try {
-            return Long.valueOf(s);
-        } catch (NumberFormatException nfe) {
+        // See PIG-2835. Using exception handling to check if it's a double is very expensive.
+        // So we write our sanity check.
+        Long ret = null;
+        if (sanityCheckIntegerLong(s)) {
+            try {
+                ret = Long.valueOf(s);
+            } catch (NumberFormatException nfe) {
+            }
+        }
+        
+        if (ret == null) {
             // It's possible that this field can be interpreted as a double.
             // Unfortunately Java doesn't handle this in Long.valueOf.  So
             // we need to try to convert it to a double and if that works then
@@ -462,11 +497,12 @@ public class Utf8StorageConverter implem
             } catch (NumberFormatException nfe2) {
                 LogUtils.warn(this, "Unable to interpret value " + Arrays.toString(b) + " in field being " +
                             "converted to long, caught NumberFormatException <" +
-                            nfe.getMessage() + "> field discarded", 
+                            nfe2.getMessage() + "> field discarded", 
                             PigWarning.FIELD_DISCARDED_TYPE_CONVERSION_FAILED, mLog);
                 return null;
             }
         }
+        return ret;
     }
 
     @Override