You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by pr...@apache.org on 2009/05/22 22:29:46 UTC

svn commit: r777696 - in /hadoop/pig/trunk: CHANGES.txt src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigInputFormat.java src/org/apache/pig/builtin/BinStorage.java src/org/apache/pig/data/DataReaderWriter.java

Author: pradeepkth
Date: Fri May 22 20:29:46 2009
New Revision: 777696

URL: http://svn.apache.org/viewvc?rev=777696&view=rev
Log:
PIG-814:Make Binstorage more robust when data contains record markers (pradeepkth)

Modified:
    hadoop/pig/trunk/CHANGES.txt
    hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigInputFormat.java
    hadoop/pig/trunk/src/org/apache/pig/builtin/BinStorage.java
    hadoop/pig/trunk/src/org/apache/pig/data/DataReaderWriter.java

Modified: hadoop/pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=777696&r1=777695&r2=777696&view=diff
==============================================================================
--- hadoop/pig/trunk/CHANGES.txt (original)
+++ hadoop/pig/trunk/CHANGES.txt Fri May 22 20:29:46 2009
@@ -48,6 +48,8 @@
 
 BUG FIXES
 
+PIG-814: Make Binstorage more robust when data contains record markers (pradeepkth)
+
 PIG-811: Globs with "?" in the pattern are broken in local mode (hagleitn via
 olgan)
 

Modified: hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigInputFormat.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigInputFormat.java?rev=777696&r1=777695&r2=777696&view=diff
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigInputFormat.java (original)
+++ hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigInputFormat.java Fri May 22 20:29:46 2009
@@ -228,7 +228,7 @@
 				    spec = new ValidatingInputFileSpec(inputs.get(i).first, store);
 				}
 				boolean isSplittable = inputs.get(i).second;
-				if (isSplittable && (spec.getSlicer() instanceof PigSlicer)) {
+				if ((spec.getSlicer() instanceof PigSlicer)) {
 				    ((PigSlicer)spec.getSlicer()).setSplittable(isSplittable);
 				}
 				Slice[] pigs = spec.getSlicer().slice(store, spec.getFileName());

Modified: hadoop/pig/trunk/src/org/apache/pig/builtin/BinStorage.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/builtin/BinStorage.java?rev=777696&r1=777695&r2=777696&view=diff
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/builtin/BinStorage.java (original)
+++ hadoop/pig/trunk/src/org/apache/pig/builtin/BinStorage.java Fri May 22 20:29:46 2009
@@ -98,10 +98,17 @@
                 continue;
             }
             if(b == -1) return null;
+            b = (byte) in.read();
+            if(b != DataType.TUPLE && b != -1) {
+                continue;
+            }
+            if(b == -1) return null;
             break;
         }
         try {
-            return (Tuple)DataReaderWriter.readDatum(inData);
+            // if we got here, we have seen RECORD_1-RECORD_2-RECORD_3-TUPLE_MARKER
+            // sequence - lets now read the contents of the tuple 
+            return (Tuple)DataReaderWriter.readDatum(inData, DataType.TUPLE);
         } catch (ExecException ee) {
             throw ee;
         }

Modified: hadoop/pig/trunk/src/org/apache/pig/data/DataReaderWriter.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/data/DataReaderWriter.java?rev=777696&r1=777695&r2=777696&view=diff
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/data/DataReaderWriter.java (original)
+++ hadoop/pig/trunk/src/org/apache/pig/data/DataReaderWriter.java Fri May 22 20:29:46 2009
@@ -88,11 +88,14 @@
         return new String(ba, DataReaderWriter.UTF8);
     }
     
-        
     public static Object readDatum(DataInput in) throws IOException, ExecException {
         // Read the data type
         byte b = in.readByte();
-        switch (b) {
+        return readDatum(in, b);
+    }
+        
+    public static Object readDatum(DataInput in, byte type) throws IOException, ExecException {
+        switch (type) {
             case DataType.TUPLE: 
                 return bytesToTuple(in);
             
@@ -138,7 +141,7 @@
                 return null;
 
             default:
-                throw new RuntimeException("Unexpected data type " + b +
+                throw new RuntimeException("Unexpected data type " + type +
                     " found in stream.");
         }
     }