You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by ga...@apache.org on 2008/07/17 19:05:07 UTC

svn commit: r677638 - in /incubator/pig/branches/types: src/org/apache/pig/builtin/Utf8StorageConverter.java src/org/apache/pig/impl/mapReduceLayer/PigMapReduce.java test/org/apache/pig/test/TestConversions.java

Author: gates
Date: Thu Jul 17 10:05:06 2008
New Revision: 677638

URL: http://svn.apache.org/viewvc?rev=677638&view=rev
Log:
PIG-312 When casting to an int or long, if the cast fails, try to cast to a double and then to int or long.


Modified:
    incubator/pig/branches/types/src/org/apache/pig/builtin/Utf8StorageConverter.java
    incubator/pig/branches/types/src/org/apache/pig/impl/mapReduceLayer/PigMapReduce.java
    incubator/pig/branches/types/test/org/apache/pig/test/TestConversions.java

Modified: incubator/pig/branches/types/src/org/apache/pig/builtin/Utf8StorageConverter.java
URL: http://svn.apache.org/viewvc/incubator/pig/branches/types/src/org/apache/pig/builtin/Utf8StorageConverter.java?rev=677638&r1=677637&r2=677638&view=diff
==============================================================================
--- incubator/pig/branches/types/src/org/apache/pig/builtin/Utf8StorageConverter.java (original)
+++ incubator/pig/branches/types/src/org/apache/pig/builtin/Utf8StorageConverter.java Thu Jul 17 10:05:06 2008
@@ -50,6 +50,9 @@
     protected BagFactory mBagFactory = BagFactory.getInstance();
     protected TupleFactory mTupleFactory = TupleFactory.getInstance();
     protected final Log mLog = LogFactory.getLog(getClass());
+
+    private Integer mMaxInt = new Integer(Integer.MAX_VALUE);
+    private Long mMaxLong = new Long(Long.MAX_VALUE);
         
     public Utf8StorageConverter() {
     }
@@ -86,24 +89,54 @@
     }
 
     public Integer bytesToInteger(byte[] b) throws IOException {
+        String s = new String(b);
         try {
-            return Integer.valueOf(new String(b));
+            return Integer.valueOf(s);
         } catch (NumberFormatException nfe) {
-            mLog.warn("Unable to interpret value " + b + " in field being " +
-                    "converted to int, caught NumberFormatException <" +
-                    nfe.getMessage() + "> field discarded");
-            return null;
+            // It's possible that this field can be interpreted as a double.
+            // Unfortunately Java doesn't handle this in Integer.valueOf.  So
+            // we need to try to convert it to a double and if that works then
+            // go to an int.
+            try {
+                Double d = Double.valueOf(s);
+                // Need to check for an overflow error
+                if (d.doubleValue() > mMaxInt.doubleValue() + 1.0) {
+                    mLog.warn("Value " + d + " too large for integer");
+                    return null;
+                }
+                return new Integer(d.intValue());
+            } catch (NumberFormatException nfe2) {
+                mLog.warn("Unable to interpret value " + b + " in field being " +
+                        "converted to int, caught NumberFormatException <" +
+                        nfe.getMessage() + "> field discarded");
+                return null;
+            }
         }
     }
 
     public Long bytesToLong(byte[] b) throws IOException {
+        String s = new String(b);
         try {
-            return Long.valueOf(new String(b));
+            return Long.valueOf(s);
         } catch (NumberFormatException nfe) {
-            mLog.warn("Unable to interpret value " + b + " in field being " +
-                    "converted to long, caught NumberFormatException <" +
-                    nfe.getMessage() + "> field discarded");
-            return null;
+            // It's possible that this field can be interpreted as a double.
+            // Unfortunately Java doesn't handle this in Long.valueOf.  So
+            // we need to try to convert it to a double and if that works then
+            // go to an long.
+            try {
+                Double d = Double.valueOf(s);
+                // Need to check for an overflow error
+                if (d.doubleValue() > mMaxLong.doubleValue() + 1.0) {
+                    mLog.warn("Value " + d + " too large for integer");
+                    return null;
+                }
+                return new Long(d.longValue());
+            } catch (NumberFormatException nfe2) {
+                mLog.warn("Unable to interpret value " + b + " in field being " +
+                        "converted to long, caught NumberFormatException <" +
+                        nfe.getMessage() + "> field discarded");
+                return null;
+            }
         }
     }
 

Modified: incubator/pig/branches/types/src/org/apache/pig/impl/mapReduceLayer/PigMapReduce.java
URL: http://svn.apache.org/viewvc/incubator/pig/branches/types/src/org/apache/pig/impl/mapReduceLayer/PigMapReduce.java?rev=677638&r1=677637&r2=677638&view=diff
==============================================================================
--- incubator/pig/branches/types/src/org/apache/pig/impl/mapReduceLayer/PigMapReduce.java (original)
+++ incubator/pig/branches/types/src/org/apache/pig/impl/mapReduceLayer/PigMapReduce.java Thu Jul 17 10:05:06 2008
@@ -46,7 +46,7 @@
 import org.apache.pig.impl.util.ObjectSerializer;
 
 /**
- * This class is the static Mapper & Reducer classes that
+ * This class is the static Mapper &amp; Reducer classes that
  * are used by Pig to execute Pig Map Reduce jobs. Since
  * there is a reduce phase, the leaf is bound to be a 
  * POLocalRearrange. So the map phase has to separate the
@@ -54,10 +54,10 @@
  * collector.
  * 
  * The shuffle and sort phase sorts these key &amp; indexed tuples
- * and creates key, List<IndexedTuple> and passes the key and
+ * and creates key, List&lt;IndexedTuple&gt; and passes the key and
  * iterator to the list. The deserialized POPackage operator
- * is used to package the key, List<IndexedTuple> into pigKey, 
- * Bag<Tuple> where pigKey is of the appropriate pig type and
+ * is used to package the key, List&lt;IndexedTuple&gt; into pigKey, 
+ * Bag&lt;Tuple&gt; where pigKey is of the appropriate pig type and
  * then the result of the package is attached to the reduce
  * plan which is executed if its not empty. Either the result 
  * of the reduce plan or the package res is collected into

Modified: incubator/pig/branches/types/test/org/apache/pig/test/TestConversions.java
URL: http://svn.apache.org/viewvc/incubator/pig/branches/types/test/org/apache/pig/test/TestConversions.java?rev=677638&r1=677637&r2=677638&view=diff
==============================================================================
--- incubator/pig/branches/types/test/org/apache/pig/test/TestConversions.java (original)
+++ incubator/pig/branches/types/test/org/apache/pig/test/TestConversions.java Thu Jul 17 10:05:06 2008
@@ -38,8 +38,8 @@
     public  void testBytesToInteger() throws IOException
     {
         // valid ints
-        String[] a = {"1", "-2345",  "1234567"};
-        Integer[] ia = {1, -2345, 1234567};
+        String[] a = {"1", "-2345",  "1234567", "1.1", "-23.45"};
+        Integer[] ia = {1, -2345, 1234567, 1, -23};
         
         for (int i = 0; i < ia.length; i++) {
             byte[] b = a[i].getBytes();
@@ -47,7 +47,7 @@
         }
         
         // invalid ints
-        a = new String[]{"1.1", "-23.45",  "1234567890123456", "This is an int"};
+        a = new String[]{"1234567890123456", "This is an int"};
         for (String s : a) {
             byte[] b = s.getBytes();
             Integer i = ps.bytesToInteger(b);
@@ -101,8 +101,8 @@
     public  void testBytesToLong() throws IOException
     {
         // valid Longs
-        String[] a = {"1", "-2345",  "123456789012345678"};
-        Long[] la = {1L, -2345L, 123456789012345678L};
+        String[] a = {"1", "-2345",  "123456789012345678", "1.1", "-23.45"};
+        Long[] la = {1L, -2345L, 123456789012345678L, 1L, -23L};
         
         for (int i = 0; i < la.length; i++) {
             byte[] b = a[i].getBytes();
@@ -110,7 +110,7 @@
         }
         
         // invalid longs
-        a = new String[]{"1.1", "-23.45",  "This is a long"};
+        a = new String[]{"This is a long", "1.0e1000"};
         for (String s : a) {
             byte[] b = s.getBytes();
             Long l = ps.bytesToLong(b);