You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by kn...@apache.org on 2017/12/21 21:39:21 UTC
svn commit: r1818985 - in /pig/trunk: ./ src/org/apache/pig/data/
test/org/apache/pig/test/
Author: knoguchi
Date: Thu Dec 21 21:39:21 2017
New Revision: 1818985
URL: http://svn.apache.org/viewvc?rev=1818985&view=rev
Log:
PIG-5300: hashCode for Bag needs to be order independent (knoguchi)
Modified:
pig/trunk/CHANGES.txt
pig/trunk/src/org/apache/pig/data/DataBag.java
pig/trunk/src/org/apache/pig/data/DefaultAbstractBag.java
pig/trunk/src/org/apache/pig/data/NonSpillableDataBag.java
pig/trunk/src/org/apache/pig/data/SingleTupleBag.java
pig/trunk/test/org/apache/pig/test/TestDataModel.java
Modified: pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1818985&r1=1818984&r2=1818985&view=diff
==============================================================================
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Thu Dec 21 21:39:21 2017
@@ -59,6 +59,7 @@ PIG-5251: Bump joda-time to 2.9.9 (dbist
OPTIMIZATIONS
BUG FIXES
+PIG-5300: hashCode for Bag needs to be order independent (knoguchi)
PIG-5318: Unit test failures on Pig on Spark with Spark 2.2 (nkollar via szita)
Modified: pig/trunk/src/org/apache/pig/data/DataBag.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/data/DataBag.java?rev=1818985&r1=1818984&r2=1818985&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/data/DataBag.java (original)
+++ pig/trunk/src/org/apache/pig/data/DataBag.java Thu Dec 21 21:39:21 2017
@@ -126,6 +126,15 @@ public interface DataBag extends Spillab
void clear();
/**
+ * Returns the hash code value for the bag. The hash code of a bag is
+ * defined to be the sum of the hash codes of each tuple.
+ * This ensures that b1.equals(b2) implies b1.hashCode() == b2.hashCode()
+ *
+ * @return the hash code value for this bag
+ */
+ int hashCode();
+
+ /**
* This is used by FuncEvalSpec.FakeDataBag.
* @param stale Set stale state.
*/
Modified: pig/trunk/src/org/apache/pig/data/DefaultAbstractBag.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/data/DefaultAbstractBag.java?rev=1818985&r1=1818984&r2=1818985&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/data/DefaultAbstractBag.java (original)
+++ pig/trunk/src/org/apache/pig/data/DefaultAbstractBag.java Thu Dec 21 21:39:21 2017
@@ -309,6 +309,9 @@ public abstract class DefaultAbstractBag
@Override
public boolean equals(Object other) {
+ if( other == null ) {
+ return false;
+ }
return compareTo(other) == 0;
}
@@ -359,11 +362,10 @@ public abstract class DefaultAbstractBag
@Override
public int hashCode() {
- int hash = 1;
+ int hash = 0;
Iterator<Tuple> i = iterator();
while (i.hasNext()) {
- // Use 37 because we want a prime, and tuple uses 31.
- hash = 37 * hash + i.next().hashCode();
+ hash += i.next().hashCode();
}
return hash;
}
Modified: pig/trunk/src/org/apache/pig/data/NonSpillableDataBag.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/data/NonSpillableDataBag.java?rev=1818985&r1=1818984&r2=1818985&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/data/NonSpillableDataBag.java (original)
+++ pig/trunk/src/org/apache/pig/data/NonSpillableDataBag.java Thu Dec 21 21:39:21 2017
@@ -198,11 +198,18 @@ public class NonSpillableDataBag impleme
*/
@Override
public boolean equals(Object obj) {
+ if( obj == null) {
+ return false;
+ }
return compareTo(obj) == 0;
}
public int hashCode() {
- return mContents.hashCode();
+ int hash = 0;
+ for( Tuple t : mContents ) {
+ hash += t.hashCode();
+ }
+ return hash;
}
@SuppressWarnings("unchecked")
Modified: pig/trunk/src/org/apache/pig/data/SingleTupleBag.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/data/SingleTupleBag.java?rev=1818985&r1=1818984&r2=1818985&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/data/SingleTupleBag.java (original)
+++ pig/trunk/src/org/apache/pig/data/SingleTupleBag.java Thu Dec 21 21:39:21 2017
@@ -156,22 +156,37 @@ public class SingleTupleBag implements D
}
}
- /* (non-Javadoc)
- * @see java.lang.Comparable#compareTo(java.lang.Object)
- */
@Override
- public int compareTo(Object o) {
- // TODO Auto-generated method stub
- return 0;
+ public int compareTo(Object other) {
+ if (this == other)
+ return 0;
+ if (other instanceof DataBag) {
+ DataBag bOther = (DataBag) other;
+ if (bOther.size() != 1) {
+ if ( 1 > bOther.size()) return 1;
+ else return -1;
+ }
+ Iterator<Tuple> iter = bOther.iterator();
+ return item.compareTo(iter.next());
+ } else {
+ return DataType.compare(this, other);
+ }
}
- public boolean equals(Object o){
- // TODO: match to compareTo if it is updated
- return true;
+ @Override
+ public boolean equals(Object other) {
+ if( other == null ) {
+ return false;
+ }
+ return compareTo(other) == 0;
}
+ @Override
public int hashCode() {
- return 42;
+ if( item != null ) {
+ return item.hashCode();
+ }
+ return 0;
}
class TBIterator implements Iterator<Tuple> {
Modified: pig/trunk/test/org/apache/pig/test/TestDataModel.java
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/TestDataModel.java?rev=1818985&r1=1818984&r2=1818985&view=diff
==============================================================================
--- pig/trunk/test/org/apache/pig/test/TestDataModel.java (original)
+++ pig/trunk/test/org/apache/pig/test/TestDataModel.java Thu Dec 21 21:39:21 2017
@@ -17,6 +17,7 @@ package org.apache.pig.test;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotEquals;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
@@ -42,7 +43,15 @@ import org.apache.pig.data.BagFactory;
import org.apache.pig.data.DataBag;
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.DataType;
+import org.apache.pig.data.DefaultDataBag;
+import org.apache.pig.data.DistinctDataBag;
+import org.apache.pig.data.InternalCachedBag;
+import org.apache.pig.data.InternalDistinctBag;
import org.apache.pig.data.InternalMap;
+import org.apache.pig.data.InternalSortedBag;
+import org.apache.pig.data.NonSpillableDataBag;
+import org.apache.pig.data.SingleTupleBag;
+import org.apache.pig.data.SortedDataBag;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
import org.junit.Test;
@@ -587,6 +596,77 @@ public class TestDataModel {
}
}
+ @Test
+ public void testBagHashCodeAndEquals() throws Exception {
+ DataBag [] ascBags = {new SortedDataBag(null), new DefaultDataBag(), new DistinctDataBag(), new InternalCachedBag(), new InternalDistinctBag(), new InternalSortedBag(), new NonSpillableDataBag()};
+ DataBag [] dscBags = {new SortedDataBag(null), new DefaultDataBag(), new DistinctDataBag(), new InternalCachedBag(), new InternalDistinctBag(), new InternalSortedBag(), new NonSpillableDataBag()};
+ Tuple input = giveMeOneOfEach();
+ TupleFactory tf = TupleFactory.getInstance();
+ for (int i = 0; i < ascBags.length; i++ ) {
+ for( int j = 0; j < input.size(); j++ ) {
+ ascBags[i].add(tf.newTuple(input.get(j)));
+ dscBags[i].add(tf.newTuple(input.get(input.size() - 1 - j)));
+ }
+ }
+ // making sure equals(null) does not throw NPE
+ for (DataBag bag : ascBags ) {
+ assertFalse(bag.equals(null));
+ }
+ // Comparing against each other including itself
+ for (DataBag bag1 : ascBags ) {
+ for (DataBag bag2 : ascBags ) {
+ assertEquals("Comparing hashcode for " + bag1.getClass() + " and " + bag2.getClass() + " failed", bag1.hashCode(), bag2.hashCode());
+ assertEquals("Comparing content of Bag for " + bag1.getClass() + " and " + bag2.getClass() + " failed", bag1, bag2);
+ }
+ }
+ // Comparing against each other again but now with different insertion
+ // order. (including from the same type of Bag class)
+ for (DataBag ascBag : ascBags) {
+ for (DataBag dscBag : dscBags) {
+ assertEquals("Comparing hashcode for " + ascBag.getClass() + " and " + dscBag.getClass() + " failed", ascBag.hashCode(), dscBag.hashCode());
+ assertEquals("Comparing content of Bag for " + ascBag.getClass() + " and " + dscBag.getClass() + " failed", ascBag, dscBag);
+ }
+ }
+ }
+
+ @Test
+ public void testBagHashCodeAndEqualsForSingleTuple() throws Exception {
+ TupleFactory tf = TupleFactory.getInstance();
+
+ // For each datatype, making sure bag hashcode and content match
+ for( Object input: giveMeOneOfEach() ) {
+ DataBag [] singleItemBags = {new SortedDataBag(null), new DefaultDataBag(), new DistinctDataBag(), new InternalCachedBag(), new InternalDistinctBag(), new InternalSortedBag(), new NonSpillableDataBag(), new SingleTupleBag(null)};
+ DataBag [] twoItemBags = {new SortedDataBag(null), new DefaultDataBag(), new DistinctDataBag(), new InternalCachedBag(), new InternalDistinctBag(), new InternalSortedBag(), new NonSpillableDataBag()};
+
+ for( DataBag singleItemBag : singleItemBags ) {
+ singleItemBag.add(tf.newTuple(input));
+ }
+
+ for( DataBag singleItemBag1 : singleItemBags ) {
+ for( DataBag singleItemBag2 : singleItemBags ) {
+ assertEquals("Comparing hashcode for " + singleItemBag1.getClass()
+ + " and " + singleItemBag2.getClass() + " failed",
+ singleItemBag1.hashCode(), singleItemBag2.hashCode());
+ assertEquals("Comparing content of Bag for " + singleItemBag1.getClass()
+ + " and " + singleItemBag2.getClass() + " failed",
+ singleItemBag1, singleItemBag2);
+ }
+ }
+
+ // making sure no two-items-bag would match with single item bag
+ for( DataBag twoItemBag : twoItemBags ) {
+ twoItemBag.add(tf.newTuple(input));
+ twoItemBag.add(tf.newTuple(new String("Unique string not to overlap with a tuple above")));
+ }
+
+ for( DataBag twoItemBag : twoItemBags ) {
+ for( DataBag singleItemBag : singleItemBags ) {
+ assertNotEquals(twoItemBag, singleItemBag);
+ }
+ }
+ }
+ }
+
private Tuple giveMeOneOfEach() throws Exception {
TupleFactory tf = TupleFactory.getInstance();