You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by jc...@apache.org on 2012/10/23 00:28:43 UTC
svn commit: r1401112 - in /pig/branches/branch-0.11: ./
src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/
src/org/apache/pig/data/ src/org/apache/pig/impl/io/ test/
test/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/
Author: jcoveney
Date: Mon Oct 22 22:28:42 2012
New Revision: 1401112
URL: http://svn.apache.org/viewvc?rev=1401112&view=rev
Log:
PIG-2975: TestTypedMap.testOrderBy failing with incorrect result (knoguchi via jcoveney)
Added:
pig/branches/branch-0.11/test/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/TestPigBytesRawComparator.java
Modified:
pig/branches/branch-0.11/CHANGES.txt
pig/branches/branch-0.11/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigBytesRawComparator.java
pig/branches/branch-0.11/src/org/apache/pig/data/BinInterSedes.java
pig/branches/branch-0.11/src/org/apache/pig/impl/io/NullableBytesWritable.java
pig/branches/branch-0.11/test/unit-tests
Modified: pig/branches/branch-0.11/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.11/CHANGES.txt?rev=1401112&r1=1401111&r2=1401112&view=diff
==============================================================================
--- pig/branches/branch-0.11/CHANGES.txt (original)
+++ pig/branches/branch-0.11/CHANGES.txt Mon Oct 22 22:28:42 2012
@@ -308,9 +308,7 @@ OPTIMIZATIONS
BUG FIXES
-PIG-2950: Fix tiny documentation error in BagToString builtin (initialcontext via daijy)
-
-PIG-2967: Fix Glob_local test failure for Pig E2E Test Framework (sushantj via daijy)
+PIG-2975: TestTypedMap.testOrderBy failing with incorrect result (knoguchi via jcoveney)
PIG-1283: COUNT on null bag causes failure (analog.sony via jcoveney)
Modified: pig/branches/branch-0.11/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigBytesRawComparator.java
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.11/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigBytesRawComparator.java?rev=1401112&r1=1401111&r2=1401112&view=diff
==============================================================================
--- pig/branches/branch-0.11/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigBytesRawComparator.java (original)
+++ pig/branches/branch-0.11/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigBytesRawComparator.java Mon Oct 22 22:28:42 2012
@@ -21,14 +21,11 @@ import java.io.IOException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
-
import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.WritableComparator;
import org.apache.hadoop.mapred.JobConf;
-
-import org.apache.pig.data.DataByteArray;
+import org.apache.pig.data.BinInterSedes;
import org.apache.pig.data.DataType;
import org.apache.pig.impl.io.NullableBytesWritable;
import org.apache.pig.impl.util.ObjectSerializer;
@@ -37,11 +34,11 @@ public class PigBytesRawComparator exten
private final Log mLog = LogFactory.getLog(getClass());
private boolean[] mAsc;
- private BytesWritable.Comparator mWrappedComp;
+ private WritableComparator mWrappedComp;
public PigBytesRawComparator() {
super(NullableBytesWritable.class);
- mWrappedComp = new BytesWritable.Comparator();
+ mWrappedComp = new BinInterSedes.BinInterSedesTupleRawComparator();
}
public void setConf(Configuration conf) {
@@ -63,6 +60,7 @@ public class PigBytesRawComparator exten
mAsc = new boolean[1];
mAsc[0] = true;
}
+ ((BinInterSedes.BinInterSedesTupleRawComparator)mWrappedComp).setConf(conf);
}
public Configuration getConf() {
@@ -70,17 +68,69 @@ public class PigBytesRawComparator exten
}
/**
- * Compare two NullableBytesWritables as raw bytes. If neither are null,
- * then IntWritable.compare() is used. If both are null then the indices
- * are compared. Otherwise the null one is defined to be less.
+ * Compare two NullableBytesWritables as raw bytes.
+ * If both are null, then the indices are compared.
+ * If neither are null
+ * and both are bytearrays, then direct Writable.compareBytes is used.
+ * For non-bytearrays, we use BinInterSedesTupleRawComparator.
+ * If either is null, null one is defined to be less.
*/
public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
int rc = 0;
// If either are null, handle differently.
if (b1[s1] == 0 && b2[s2] == 0) {
- // Subtract 2, one for null byte and one for index byte
- rc = mWrappedComp.compare(b1, s1 + 1, l1 - 2, b2, s2 + 1, l2 - 2);
+ // Checking if both of them are ByteArrays.
+ // b1[s1+1] is always TUPLE_1 so skipping.
+ // b1[s1+2] contains the datatype followed by a datasize.
+ // No need to read the actual size since it is given from l1&l2.
+ // We just need to skip those bytes here.
+ // This shortcut is placed here for performances purposes.(PIG-2975)
+ boolean dataByteArraysCompare=true;
+ int offset1,offset2,length1,length2;
+ switch(b1[s1 + 2]) {
+ case BinInterSedes.TINYBYTEARRAY:
+ // skipping mNull, TUPLE_1, TINYBYTEARRAY(1byte), size(1byte)
+ offset1 = s1 + 4;
+ length1 = l1 - 4;
+ break;
+ case BinInterSedes.SMALLBYTEARRAY:
+ // skipping mNull, TUPLE_1, SMALLBYTEARRAY(1byte), size(2byte)
+ offset1 = s1 + 5;
+ length1 = l1 - 5;
+ break;
+ case BinInterSedes.BYTEARRAY:
+ // skipping mNull, TUPLE_1, BYTEARRAY(1byte), size(4byte)
+ offset1 = s1 + 7;
+ length1 = l1 - 7;
+ break;
+ default:
+ offset1 = length1 = 0;
+ dataByteArraysCompare = false;
+ }
+ switch(b2[s2 + 2]) {
+ case BinInterSedes.TINYBYTEARRAY:
+ offset2 = s2 + 4;
+ length2 = l2 - 4;
+ break;
+ case BinInterSedes.SMALLBYTEARRAY:
+ offset2 = s2 + 5;
+ length2 = l2 - 5;
+ break;
+ case BinInterSedes.BYTEARRAY:
+ offset2 = s2 + 7;
+ length2 = l2 - 7;
+ break;
+ default:
+ offset2 = length2 = 0;
+ dataByteArraysCompare=false;
+ }
+ if( dataByteArraysCompare ) {
+ rc = WritableComparator.compareBytes(b1, offset1, length1, b2, offset2, length2);
+ } else {
+ // Subtract 2, one for null byte and one for index byte
+ rc = mWrappedComp.compare(b1, s1 + 1, l1 - 2, b2, s2 + 1, l2 - 2);
+ }
} else {
// For sorting purposes two nulls are equal.
if (b1[s1] != 0 && b2[s2] != 0) rc = 0;
Modified: pig/branches/branch-0.11/src/org/apache/pig/data/BinInterSedes.java
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.11/src/org/apache/pig/data/BinInterSedes.java?rev=1401112&r1=1401111&r2=1401112&view=diff
==============================================================================
--- pig/branches/branch-0.11/src/org/apache/pig/data/BinInterSedes.java (original)
+++ pig/branches/branch-0.11/src/org/apache/pig/data/BinInterSedes.java Mon Oct 22 22:28:42 2012
@@ -862,11 +862,9 @@ public class BinInterSedes implements In
if (type1 == type2) {
int basz1 = readSize(bb1, dt1);
int basz2 = readSize(bb2, dt2);
- byte[] ba1 = new byte[basz1];
- byte[] ba2 = new byte[basz2];
- bb1.get(ba1);
- bb2.get(ba2);
- rc = DataByteArray.compare(ba1, ba2);
+ rc = org.apache.hadoop.io.WritableComparator.compareBytes(
+ bb1.array(), bb1.position(), basz1,
+ bb2.array(), bb2.position(), basz2);
}
break;
}
Modified: pig/branches/branch-0.11/src/org/apache/pig/impl/io/NullableBytesWritable.java
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.11/src/org/apache/pig/impl/io/NullableBytesWritable.java?rev=1401112&r1=1401111&r2=1401112&view=diff
==============================================================================
--- pig/branches/branch-0.11/src/org/apache/pig/impl/io/NullableBytesWritable.java (original)
+++ pig/branches/branch-0.11/src/org/apache/pig/impl/io/NullableBytesWritable.java Mon Oct 22 22:28:42 2012
@@ -25,28 +25,32 @@ import org.apache.pig.data.TupleFactory;
*
*/
public class NullableBytesWritable extends PigNullableWritable {
+ private static final TupleFactory mTupleFactory = TupleFactory.getInstance();
public NullableBytesWritable() {
- mValue = TupleFactory.getInstance().newTuple();
+ mValue = mTupleFactory.newTuple();
}
/**
* @param obj
*/
public NullableBytesWritable(Object obj) {
- mValue = TupleFactory.getInstance().newTuple();
- ((Tuple)mValue).append(obj);
+ mValue = mTupleFactory.newTuple(1);
+ try {
+ ((Tuple)mValue).set(0, obj);
+ } catch (ExecException e) {
+ throw new RuntimeException(e);
+ }
}
public Object getValueAsPigType() {
- if (isNull())
+ if (isNull()) {
return null;
- Object obj = null;
+ }
try {
- obj = ((Tuple)mValue).get(0);
+ return ((Tuple)mValue).get(0);
} catch (ExecException e) {
throw new RuntimeException(e);
}
- return obj;
}
}
Added: pig/branches/branch-0.11/test/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/TestPigBytesRawComparator.java
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.11/test/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/TestPigBytesRawComparator.java?rev=1401112&view=auto
==============================================================================
--- pig/branches/branch-0.11/test/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/TestPigBytesRawComparator.java (added)
+++ pig/branches/branch-0.11/test/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/TestPigBytesRawComparator.java Mon Oct 22 22:28:42 2012
@@ -0,0 +1,204 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pig.backend.hadoop.executionengine.mapReduceLayer;
+
+import static junit.framework.Assert.assertTrue;
+
+import java.io.ByteArrayOutputStream;
+import java.io.DataOutput;
+import java.io.DataOutputStream;
+import java.util.Arrays;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.pig.data.BinInterSedes;
+import org.apache.pig.data.DataByteArray;
+import org.apache.pig.impl.io.NullableBytesWritable;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+
+public class TestPigBytesRawComparator {
+ private static PigBytesRawComparator bytesRawComparator ;
+ private static ByteArrayOutputStream bas;
+
+
+ @BeforeClass
+ public static void setupBeforeClass() throws Exception {
+ bytesRawComparator = new PigBytesRawComparator();
+ bytesRawComparator.setConf(new JobConf());
+
+ //this bytesarray is used for holding serialized NullableBytesWritable
+ bas = new ByteArrayOutputStream(BinInterSedes.UNSIGNED_SHORT_MAX + 10);
+ }
+ @AfterClass
+ public static void setupAfterClass() throws Exception {
+ bytesRawComparator = null;
+ bas = null;
+ }
+
+ @Test
+ public void testBoolean() throws Exception {
+ assertTrue("boolean True and False considered equal",
+ compareTwoObjectsAsNullableBytesWritables(
+ new Boolean(true), new Boolean(false)) != 0);
+ }
+
+
+ @Test
+ public void testSingleInt0and1() throws Exception {
+ assertTrue("Integer 0 and Integer 1 considered equal",
+ compareTwoObjectsAsNullableBytesWritables(
+ new Integer(0), new Integer(1)) != 0);
+ }
+
+ @Test
+ public void testSingleInt() throws Exception {
+ assertTrue("Integer 5 and Integer 7 considered equal",
+ compareTwoObjectsAsNullableBytesWritables(
+ new Integer(5), new Integer(7)) != 0);
+ }
+
+ @Test
+ public void testSingleLong0and1() throws Exception {
+ assertTrue("Long 0 and Long 1 considered equal",
+ compareTwoObjectsAsNullableBytesWritables(
+ new Long(0), new Long(1)) != 0);
+ }
+
+ @Test
+ public void testSingleLong() throws Exception {
+ assertTrue("Long 5 and Long 7 considered equal",
+ compareTwoObjectsAsNullableBytesWritables(
+ new Long(5), new Long(7)) != 0);
+ }
+
+ @Test
+ public void testSingleByte() throws Exception {
+ assertTrue("Byte 5 and Byte 7 considered equal",
+ compareTwoObjectsAsNullableBytesWritables(
+ new Byte((byte)5), new Byte((byte)7)) != 0);
+ }
+
+ @Test
+ public void testSingleByteDiffByteArray() throws Exception {
+ assertTrue("'ab' and 'ac' considered equal",
+ compareTwoObjectsAsNullableBytesWritables(
+ new DataByteArray(new byte[] {'a','b'}),
+ new DataByteArray(new byte[] {'a','c'})) != 0);
+ assertTrue("'ab' and 'abc' considered equal",
+ compareTwoObjectsAsNullableBytesWritables(
+ new DataByteArray(new byte[] {'a','b'}),
+ new DataByteArray(new byte[] {'a','b','c'})) != 0);
+ assertTrue("'ab' and 'cb' considered equal",
+ compareTwoObjectsAsNullableBytesWritables(
+ new DataByteArray(new byte[] {'a','b'}),
+ new DataByteArray(new byte[] {'c','b'})) != 0);
+ assertTrue("'a' and 'b' considered equal",
+ compareTwoObjectsAsNullableBytesWritables(
+ new DataByteArray(new byte[] {'a'}),
+ new DataByteArray(new byte[] {'b'})) != 0);
+ }
+
+ @Test
+ public void testDifferentType() throws Exception {
+ assertTrue("Integer 9999 and Long 9999 considered equal",
+ compareTwoObjectsAsNullableBytesWritables(new Integer(9999), new Long(9999)) != 0 );
+ }
+
+ @Test
+ public void testByteArrayAlphabeticalOrderingByLength() throws Exception {
+ // making sure we order as
+ // '1'
+ // '2'
+ // '22'
+ // '222' and repeats...
+ // If compare includes the header, order suddenly changes when header
+ // changes from INTEGER_INSHORT to INTEGER.
+ // Here, we're making sure that doesn't happen.
+ //
+ for(int i = BinInterSedes.UNSIGNED_BYTE_MAX - 3 ;
+ i < BinInterSedes.UNSIGNED_BYTE_MAX + 3; i++){
+ DataByteArray dba1 = createRepeatedByteArray((byte)'2', i);
+ DataByteArray dba2 = createRepeatedByteArray((byte)'2', i + 1);
+ assertTrue("ByteArray order changed at UNSIGNED_BYTE_MAX boundary",
+ compareTwoObjectsAsNullableBytesWritables(dba1, dba2) < 0);
+ }
+ for(int i = BinInterSedes.UNSIGNED_SHORT_MAX -3 ;
+ i < BinInterSedes.UNSIGNED_SHORT_MAX + 3; i++){
+ DataByteArray dba1 = createRepeatedByteArray((byte)'2', i);
+ DataByteArray dba2 = createRepeatedByteArray((byte)'2', i + 1);
+ assertTrue("ByteArray order changed at UNSIGNED_SHORT_MAX boundary",
+ compareTwoObjectsAsNullableBytesWritables(dba1, dba2) < 0);
+ }
+ }
+
+ @Test
+ public void testLongByteArrays() throws Exception {
+ for(int length: new int [] {BinInterSedes.UNSIGNED_BYTE_MAX + 50,
+ BinInterSedes.UNSIGNED_SHORT_MAX + 50} ) {
+ // To make sure that offset & length are set correctly for
+ // BinInterSedes.SMALLBYTEARRAY and BinInterSedes.BYTEARRAY,
+ // create a long bytearray and compare with first or last byte changed
+
+ byte [] ba1 = new byte[length];
+ byte [] ba2 = new byte[length];
+ Arrays.fill(ba1, 0, length, (byte)'a');
+ Arrays.fill(ba2, 0, length, (byte)'a');
+ //changing only the last byte
+ ba2[length-1] = 'b';
+ assertTrue("ByteArray with length: " + length
+ + " compare failed with the last byte",
+ compareTwoObjectsAsNullableBytesWritables(
+ new DataByteArray(ba1), new DataByteArray(ba2)) != 0);
+ //setting back
+ ba2[length-1] = 'a';
+
+ //now changing the first byte
+ ba2[0] = 'b';
+ assertTrue("ByteArray with length: " + length
+ + " compare failed with the first byte",
+ compareTwoObjectsAsNullableBytesWritables(
+ new DataByteArray(ba1), new DataByteArray(ba2)) != 0);
+ }
+ }
+
+ private DataByteArray createRepeatedByteArray(byte c, int num) {
+ byte [] ba = new byte[num];
+ Arrays.fill(ba, 0, num, c);
+ return new DataByteArray(ba);
+ }
+
+ // Wrap the passed object with NullableBytesWritable and return the serialized
+ // form.
+ private byte [] serializeAsNullableBytesWritable(Object obj) throws Exception {
+ NullableBytesWritable nbw = new NullableBytesWritable(obj);
+ bas.reset();
+ DataOutput dout = new DataOutputStream(bas);
+ nbw.write(dout);
+ return bas.toByteArray();
+ }
+
+ // Take two objects wrapped by NullableBytesWritable and comare using
+ // PigBytesRawComparator
+ private int compareTwoObjectsAsNullableBytesWritables(Object obj1, Object obj2) throws Exception {
+ byte [] ba1 = serializeAsNullableBytesWritable(obj1);
+ byte [] ba2 = serializeAsNullableBytesWritable(obj2);
+ return bytesRawComparator.compare(ba1, 0, ba1.length, ba2, 0, ba2.length);
+ }
+}
Modified: pig/branches/branch-0.11/test/unit-tests
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.11/test/unit-tests?rev=1401112&r1=1401111&r2=1401112&view=diff
==============================================================================
--- pig/branches/branch-0.11/test/unit-tests (original)
+++ pig/branches/branch-0.11/test/unit-tests Mon Oct 22 22:28:42 2012
@@ -44,6 +44,7 @@
**/TestNull.java
**/TestPackage.java
**/TestParamSubPreproc.java
+**/TestPigBytesRawComparator.java
**/TestPhyOp.java
**/TestPigScriptParser.java
**/TestPigSplit.java