You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by jc...@apache.org on 2012/10/23 00:28:43 UTC

svn commit: r1401112 - in /pig/branches/branch-0.11: ./ src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/ src/org/apache/pig/data/ src/org/apache/pig/impl/io/ test/ test/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/

Author: jcoveney
Date: Mon Oct 22 22:28:42 2012
New Revision: 1401112

URL: http://svn.apache.org/viewvc?rev=1401112&view=rev
Log:
PIG-2975: TestTypedMap.testOrderBy failing with incorrect result (knoguchi via jcoveney)

Added:
    pig/branches/branch-0.11/test/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/TestPigBytesRawComparator.java
Modified:
    pig/branches/branch-0.11/CHANGES.txt
    pig/branches/branch-0.11/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigBytesRawComparator.java
    pig/branches/branch-0.11/src/org/apache/pig/data/BinInterSedes.java
    pig/branches/branch-0.11/src/org/apache/pig/impl/io/NullableBytesWritable.java
    pig/branches/branch-0.11/test/unit-tests

Modified: pig/branches/branch-0.11/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.11/CHANGES.txt?rev=1401112&r1=1401111&r2=1401112&view=diff
==============================================================================
--- pig/branches/branch-0.11/CHANGES.txt (original)
+++ pig/branches/branch-0.11/CHANGES.txt Mon Oct 22 22:28:42 2012
@@ -308,9 +308,7 @@ OPTIMIZATIONS
 
 BUG FIXES
 
-PIG-2950: Fix tiny documentation error in BagToString builtin (initialcontext via daijy)
-
-PIG-2967: Fix Glob_local test failure for Pig E2E Test Framework (sushantj via daijy)
+PIG-2975: TestTypedMap.testOrderBy failing with incorrect result (knoguchi via jcoveney)
 
 PIG-1283: COUNT on null bag causes failure (analog.sony via jcoveney)
 

Modified: pig/branches/branch-0.11/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigBytesRawComparator.java
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.11/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigBytesRawComparator.java?rev=1401112&r1=1401111&r2=1401112&view=diff
==============================================================================
--- pig/branches/branch-0.11/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigBytesRawComparator.java (original)
+++ pig/branches/branch-0.11/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigBytesRawComparator.java Mon Oct 22 22:28:42 2012
@@ -21,14 +21,11 @@ import java.io.IOException;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
-
 import org.apache.hadoop.conf.Configurable;
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.io.BytesWritable;
 import org.apache.hadoop.io.WritableComparator;
 import org.apache.hadoop.mapred.JobConf;
-
-import org.apache.pig.data.DataByteArray;
+import org.apache.pig.data.BinInterSedes;
 import org.apache.pig.data.DataType;
 import org.apache.pig.impl.io.NullableBytesWritable;
 import org.apache.pig.impl.util.ObjectSerializer;
@@ -37,11 +34,11 @@ public class PigBytesRawComparator exten
 
     private final Log mLog = LogFactory.getLog(getClass());
     private boolean[] mAsc;
-    private BytesWritable.Comparator mWrappedComp;
+    private WritableComparator mWrappedComp;
 
     public PigBytesRawComparator() {
         super(NullableBytesWritable.class);
-        mWrappedComp = new BytesWritable.Comparator();
+        mWrappedComp = new BinInterSedes.BinInterSedesTupleRawComparator();
     }
 
     public void setConf(Configuration conf) {
@@ -63,6 +60,7 @@ public class PigBytesRawComparator exten
             mAsc = new boolean[1];
             mAsc[0] = true;
         }
+        ((BinInterSedes.BinInterSedesTupleRawComparator)mWrappedComp).setConf(conf);
     }
 
     public Configuration getConf() {
@@ -70,17 +68,69 @@ public class PigBytesRawComparator exten
     }
 
     /**
-     * Compare two NullableBytesWritables as raw bytes.  If neither are null,
-     * then IntWritable.compare() is used.  If both are null then the indices
-     * are compared.  Otherwise the null one is defined to be less.
+     * Compare two NullableBytesWritables as raw bytes.
+     * If both are null, then the indices are compared.
+     * If neither are null
+     *    and both are bytearrays, then direct Writable.compareBytes is used.
+     *    For non-bytearrays, we use BinInterSedesTupleRawComparator.
+     * If either is null, null one is defined to be less.
      */
     public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
         int rc = 0;
 
         // If either are null, handle differently.
         if (b1[s1] == 0 && b2[s2] == 0) {
-            // Subtract 2, one for null byte and one for index byte
-            rc = mWrappedComp.compare(b1, s1 + 1, l1 - 2, b2, s2 + 1, l2 - 2);
+            // Checking if both of them are ByteArrays.
+            // b1[s1+1] is always TUPLE_1 so skipping.
+            // b1[s1+2] contains the datatype followed by a datasize.
+            // No need to read the actual size since it is given from l1&l2.
+            // We just need to skip those bytes here.
+            // This shortcut is placed here for performances purposes.(PIG-2975)
+            boolean dataByteArraysCompare=true;
+            int offset1,offset2,length1,length2;
+            switch(b1[s1 + 2]) {
+              case BinInterSedes.TINYBYTEARRAY:
+                // skipping mNull, TUPLE_1, TINYBYTEARRAY(1byte), size(1byte)
+                offset1 = s1 + 4;
+                length1 = l1 - 4;
+                break;
+              case BinInterSedes.SMALLBYTEARRAY:
+                // skipping mNull, TUPLE_1, SMALLBYTEARRAY(1byte), size(2byte)
+                offset1 = s1 + 5;
+                length1 = l1 - 5;
+                break;
+              case BinInterSedes.BYTEARRAY:
+                // skipping mNull, TUPLE_1, BYTEARRAY(1byte), size(4byte)
+                offset1 = s1 + 7;
+                length1 = l1 - 7;
+                break;
+              default:
+                offset1 = length1 = 0;
+                dataByteArraysCompare = false;
+            }
+            switch(b2[s2 + 2]) {
+              case BinInterSedes.TINYBYTEARRAY:
+                offset2 = s2 + 4;
+                length2 = l2 - 4;
+                break;
+              case BinInterSedes.SMALLBYTEARRAY:
+                offset2 = s2 + 5;
+                length2 = l2 - 5;
+                break;
+              case BinInterSedes.BYTEARRAY:
+                offset2 = s2 + 7;
+                length2 = l2 - 7;
+                break;
+              default:
+                offset2 = length2 = 0;
+                dataByteArraysCompare=false;
+            }
+            if( dataByteArraysCompare ) {
+              rc = WritableComparator.compareBytes(b1, offset1, length1, b2, offset2, length2);
+            } else {
+              // Subtract 2, one for null byte and one for index byte
+              rc = mWrappedComp.compare(b1, s1 + 1, l1 - 2, b2, s2 + 1, l2 - 2);
+            }
         } else {
             // For sorting purposes two nulls are equal.
             if (b1[s1] != 0 && b2[s2] != 0) rc = 0;

Modified: pig/branches/branch-0.11/src/org/apache/pig/data/BinInterSedes.java
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.11/src/org/apache/pig/data/BinInterSedes.java?rev=1401112&r1=1401111&r2=1401112&view=diff
==============================================================================
--- pig/branches/branch-0.11/src/org/apache/pig/data/BinInterSedes.java (original)
+++ pig/branches/branch-0.11/src/org/apache/pig/data/BinInterSedes.java Mon Oct 22 22:28:42 2012
@@ -862,11 +862,9 @@ public class BinInterSedes implements In
                 if (type1 == type2) {
                     int basz1 = readSize(bb1, dt1);
                     int basz2 = readSize(bb2, dt2);
-                    byte[] ba1 = new byte[basz1];
-                    byte[] ba2 = new byte[basz2];
-                    bb1.get(ba1);
-                    bb2.get(ba2);
-                    rc = DataByteArray.compare(ba1, ba2);
+                    rc = org.apache.hadoop.io.WritableComparator.compareBytes(
+                          bb1.array(), bb1.position(), basz1,
+                          bb2.array(), bb2.position(), basz2);
                 }
                 break;
             }

Modified: pig/branches/branch-0.11/src/org/apache/pig/impl/io/NullableBytesWritable.java
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.11/src/org/apache/pig/impl/io/NullableBytesWritable.java?rev=1401112&r1=1401111&r2=1401112&view=diff
==============================================================================
--- pig/branches/branch-0.11/src/org/apache/pig/impl/io/NullableBytesWritable.java (original)
+++ pig/branches/branch-0.11/src/org/apache/pig/impl/io/NullableBytesWritable.java Mon Oct 22 22:28:42 2012
@@ -25,28 +25,32 @@ import org.apache.pig.data.TupleFactory;
  *
  */
 public class NullableBytesWritable extends PigNullableWritable {
+    private static final TupleFactory mTupleFactory = TupleFactory.getInstance();
 
     public NullableBytesWritable() {
-        mValue = TupleFactory.getInstance().newTuple();
+        mValue = mTupleFactory.newTuple();
     }
 
     /**
      * @param obj
      */
     public NullableBytesWritable(Object obj) {
-        mValue = TupleFactory.getInstance().newTuple();
-        ((Tuple)mValue).append(obj);
+        mValue = mTupleFactory.newTuple(1);
+        try {
+            ((Tuple)mValue).set(0, obj);
+        } catch (ExecException e) {
+            throw new RuntimeException(e);
+        }
     }
 
     public Object getValueAsPigType() {
-        if (isNull())
+        if (isNull()) {
             return null;
-        Object obj = null;
+        }
         try {
-            obj = ((Tuple)mValue).get(0);
+            return ((Tuple)mValue).get(0);
         } catch (ExecException e) {
             throw new RuntimeException(e);
         }
-        return obj;
     }
 }

Added: pig/branches/branch-0.11/test/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/TestPigBytesRawComparator.java
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.11/test/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/TestPigBytesRawComparator.java?rev=1401112&view=auto
==============================================================================
--- pig/branches/branch-0.11/test/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/TestPigBytesRawComparator.java (added)
+++ pig/branches/branch-0.11/test/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/TestPigBytesRawComparator.java Mon Oct 22 22:28:42 2012
@@ -0,0 +1,204 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pig.backend.hadoop.executionengine.mapReduceLayer;
+
+import static junit.framework.Assert.assertTrue;
+
+import java.io.ByteArrayOutputStream;
+import java.io.DataOutput;
+import java.io.DataOutputStream;
+import java.util.Arrays;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.pig.data.BinInterSedes;
+import org.apache.pig.data.DataByteArray;
+import org.apache.pig.impl.io.NullableBytesWritable;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+
+public class TestPigBytesRawComparator {
+  private static PigBytesRawComparator bytesRawComparator ;
+  private static ByteArrayOutputStream bas;
+
+
+  @BeforeClass
+  public static void setupBeforeClass() throws Exception {
+    bytesRawComparator =  new PigBytesRawComparator();
+    bytesRawComparator.setConf(new JobConf());
+
+    //this bytesarray is used for holding serialized NullableBytesWritable
+    bas = new ByteArrayOutputStream(BinInterSedes.UNSIGNED_SHORT_MAX + 10);
+  }
+  @AfterClass
+  public static void setupAfterClass() throws Exception {
+    bytesRawComparator =  null;
+    bas = null;
+  }
+
+  @Test
+  public void testBoolean() throws Exception {
+    assertTrue("boolean True and False considered equal",
+        compareTwoObjectsAsNullableBytesWritables(
+                                new Boolean(true), new Boolean(false)) !=  0);
+  }
+
+
+  @Test
+  public void testSingleInt0and1() throws Exception {
+    assertTrue("Integer 0 and Integer 1 considered equal",
+        compareTwoObjectsAsNullableBytesWritables(
+                                new Integer(0), new Integer(1)) !=  0);
+  }
+
+  @Test
+  public void testSingleInt() throws Exception {
+    assertTrue("Integer 5 and Integer 7 considered equal",
+        compareTwoObjectsAsNullableBytesWritables(
+                                new Integer(5), new Integer(7)) !=  0);
+  }
+
+  @Test
+  public void testSingleLong0and1() throws Exception {
+     assertTrue("Long 0 and Long 1 considered equal",
+        compareTwoObjectsAsNullableBytesWritables(
+                                new Long(0), new Long(1)) != 0);
+  }
+
+  @Test
+  public void testSingleLong() throws Exception {
+    assertTrue("Long 5 and Long 7 considered equal",
+        compareTwoObjectsAsNullableBytesWritables(
+                                new Long(5), new Long(7)) !=  0);
+  }
+
+  @Test
+  public void testSingleByte() throws Exception {
+    assertTrue("Byte 5 and Byte 7 considered equal",
+        compareTwoObjectsAsNullableBytesWritables(
+                                new Byte((byte)5), new Byte((byte)7)) !=  0);
+  }
+
+  @Test
+  public void testSingleByteDiffByteArray() throws Exception {
+     assertTrue("'ab' and 'ac' considered equal",
+        compareTwoObjectsAsNullableBytesWritables(
+                           new DataByteArray(new byte[] {'a','b'}),
+                           new DataByteArray(new byte[] {'a','c'})) != 0);
+     assertTrue("'ab' and 'abc' considered equal",
+        compareTwoObjectsAsNullableBytesWritables(
+                           new DataByteArray(new byte[] {'a','b'}),
+                           new DataByteArray(new byte[] {'a','b','c'})) != 0);
+     assertTrue("'ab' and 'cb' considered equal",
+        compareTwoObjectsAsNullableBytesWritables(
+                           new DataByteArray(new byte[] {'a','b'}),
+                           new DataByteArray(new byte[] {'c','b'})) != 0);
+     assertTrue("'a' and 'b' considered equal",
+        compareTwoObjectsAsNullableBytesWritables(
+                           new DataByteArray(new byte[] {'a'}),
+                           new DataByteArray(new byte[] {'b'})) != 0);
+  }
+
+  @Test
+  public void testDifferentType() throws Exception {
+     assertTrue("Integer 9999 and Long 9999 considered equal",
+        compareTwoObjectsAsNullableBytesWritables(new Integer(9999), new Long(9999)) != 0 );
+  }
+
+  @Test
+  public void testByteArrayAlphabeticalOrderingByLength() throws Exception {
+    // making sure we order as
+    // '1'
+    // '2'
+    // '22'
+    // '222' and repeats...
+    // If compare includes the header, order suddenly changes when header
+    // changes from INTEGER_INSHORT to INTEGER.
+    // Here, we're making sure that doesn't happen.
+    //
+    for(int i = BinInterSedes.UNSIGNED_BYTE_MAX - 3 ;
+            i <  BinInterSedes.UNSIGNED_BYTE_MAX + 3; i++){
+      DataByteArray dba1 = createRepeatedByteArray((byte)'2', i);
+      DataByteArray dba2 = createRepeatedByteArray((byte)'2', i + 1);
+      assertTrue("ByteArray order changed at UNSIGNED_BYTE_MAX boundary",
+                 compareTwoObjectsAsNullableBytesWritables(dba1, dba2) < 0);
+    }
+    for(int i = BinInterSedes.UNSIGNED_SHORT_MAX -3 ;
+            i <  BinInterSedes.UNSIGNED_SHORT_MAX + 3; i++){
+      DataByteArray dba1 = createRepeatedByteArray((byte)'2', i);
+      DataByteArray dba2 = createRepeatedByteArray((byte)'2', i + 1);
+      assertTrue("ByteArray order changed at UNSIGNED_SHORT_MAX boundary",
+                 compareTwoObjectsAsNullableBytesWritables(dba1, dba2) < 0);
+    }
+  }
+
+  @Test
+  public void testLongByteArrays() throws Exception {
+    for(int length: new int [] {BinInterSedes.UNSIGNED_BYTE_MAX + 50,
+                                BinInterSedes.UNSIGNED_SHORT_MAX + 50} ) {
+      // To make sure that offset & length are set correctly for
+      // BinInterSedes.SMALLBYTEARRAY and BinInterSedes.BYTEARRAY,
+      // create a long bytearray and compare with first or last byte changed
+
+      byte [] ba1 = new byte[length];
+      byte [] ba2 = new byte[length];
+      Arrays.fill(ba1, 0, length, (byte)'a');
+      Arrays.fill(ba2, 0, length, (byte)'a');
+      //changing only the last byte
+      ba2[length-1] = 'b';
+      assertTrue("ByteArray with length: " + length
+                 + " compare failed with the last byte",
+                 compareTwoObjectsAsNullableBytesWritables(
+                      new DataByteArray(ba1), new DataByteArray(ba2)) != 0);
+      //setting back
+      ba2[length-1] = 'a';
+
+      //now changing the first byte
+      ba2[0] = 'b';
+      assertTrue("ByteArray with length: " + length
+                 + " compare failed with the first byte",
+                 compareTwoObjectsAsNullableBytesWritables(
+                      new DataByteArray(ba1), new DataByteArray(ba2)) != 0);
+    }
+  }
+
+  private DataByteArray createRepeatedByteArray(byte c, int num) {
+    byte [] ba = new byte[num];
+    Arrays.fill(ba, 0, num, c);
+    return new DataByteArray(ba);
+  }
+
+  // Wrap the passed object with NullableBytesWritable and return the serialized
+  // form.
+  private byte [] serializeAsNullableBytesWritable(Object obj) throws Exception {
+    NullableBytesWritable nbw = new NullableBytesWritable(obj);
+    bas.reset();
+    DataOutput dout = new DataOutputStream(bas);
+    nbw.write(dout);
+    return bas.toByteArray();
+  }
+
+  // Take two objects wrapped by NullableBytesWritable and comare using
+  // PigBytesRawComparator
+  private int compareTwoObjectsAsNullableBytesWritables(Object obj1, Object obj2) throws Exception {
+    byte [] ba1 = serializeAsNullableBytesWritable(obj1);
+    byte [] ba2 = serializeAsNullableBytesWritable(obj2);
+    return bytesRawComparator.compare(ba1, 0, ba1.length, ba2, 0, ba2.length);
+  }
+}

Modified: pig/branches/branch-0.11/test/unit-tests
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.11/test/unit-tests?rev=1401112&r1=1401111&r2=1401112&view=diff
==============================================================================
--- pig/branches/branch-0.11/test/unit-tests (original)
+++ pig/branches/branch-0.11/test/unit-tests Mon Oct 22 22:28:42 2012
@@ -44,6 +44,7 @@
 **/TestNull.java
 **/TestPackage.java
 **/TestParamSubPreproc.java
+**/TestPigBytesRawComparator.java
 **/TestPhyOp.java
 **/TestPigScriptParser.java
 **/TestPigSplit.java