You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by mm...@apache.org on 2016/05/15 03:51:44 UTC

[3/3] hive git commit: HIVE-13682: EOFException with fast hashtable (Matt McCline, reviewed by Sergey Shelukhin)

HIVE-13682: EOFException with fast hashtable (Matt McCline, reviewed by Sergey Shelukhin)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/4533d21b
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/4533d21b
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/4533d21b

Branch: refs/heads/master
Commit: 4533d21b0be487e1f11fcc95578a2ba103e72a64
Parents: fbeee62
Author: Matt McCline <mm...@hortonworks.com>
Authored: Sat May 14 20:44:27 2016 -0700
Committer: Matt McCline <mm...@hortonworks.com>
Committed: Sat May 14 20:44:27 2016 -0700

----------------------------------------------------------------------
 .../fast/VectorMapJoinFastBytesHashMap.java     |  12 +-
 .../VectorMapJoinFastBytesHashMultiSet.java     |  10 +-
 .../fast/VectorMapJoinFastBytesHashSet.java     |   5 +-
 .../fast/VectorMapJoinFastBytesHashTable.java   |  14 +-
 .../fast/VectorMapJoinFastLongHashMap.java      |  22 +-
 .../fast/VectorMapJoinFastLongHashMultiSet.java |  13 +-
 .../fast/VectorMapJoinFastLongHashSet.java      |  16 +-
 .../fast/VectorMapJoinFastLongHashTable.java    |  13 -
 .../fast/VectorMapJoinFastMultiKeyHashMap.java  |  21 +-
 .../VectorMapJoinFastMultiKeyHashMultiSet.java  |  25 +-
 .../fast/VectorMapJoinFastMultiKeyHashSet.java  |  26 +-
 .../fast/VectorMapJoinFastStringHashMap.java    |   4 +-
 .../VectorMapJoinFastStringHashMultiSet.java    |   4 +-
 .../fast/VectorMapJoinFastStringHashSet.java    |   4 +-
 .../fast/VectorMapJoinFastValueStore.java       | 187 ++++-
 .../ql/exec/vector/RandomRowObjectSource.java   | 388 ----------
 .../ql/exec/vector/TestVectorRowObject.java     |  34 +-
 .../hive/ql/exec/vector/TestVectorSerDeRow.java |   8 +-
 .../vector/mapjoin/fast/CheckFastHashTable.java | 721 +++++++++++++++++++
 .../mapjoin/fast/CommonFastHashTable.java       |  62 +-
 .../fast/TestVectorMapJoinFastBytesHashMap.java | 272 +++++++
 .../TestVectorMapJoinFastBytesHashMultiSet.java | 253 +++++++
 .../fast/TestVectorMapJoinFastBytesHashSet.java | 252 +++++++
 .../fast/TestVectorMapJoinFastLongHashMap.java  | 303 ++++----
 .../TestVectorMapJoinFastLongHashMultiSet.java  | 252 +++++++
 .../fast/TestVectorMapJoinFastLongHashSet.java  | 250 +++++++
 .../TestVectorMapJoinFastMultiKeyHashMap.java   | 231 ------
 .../hive/serde2/fast/RandomRowObjectSource.java | 423 +++++++++++
 .../fast/LazyBinaryDeserializeRead.java         |   2 +-
 .../apache/hadoop/hive/serde2/VerifyFast.java   | 123 ++--
 .../hive/serde2/binarysortable/MyTestClass.java |  86 +++
 .../binarysortable/TestBinarySortableFast.java  | 384 +++++++---
 .../hive/serde2/lazy/TestLazySimpleFast.java    | 270 ++++---
 .../serde2/lazybinary/TestLazyBinaryFast.java   | 285 ++++++--
 .../hadoop/hive/common/type/RandomTypeUtil.java |  95 +++
 35 files changed, 3852 insertions(+), 1218 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java
index 0ff98bd..a4bc188 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java
@@ -18,16 +18,23 @@
 
 package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast;
 
+import java.io.IOException;
+
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.apache.hadoop.hive.ql.exec.JoinUtil;
 import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashMap;
 import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.io.BytesWritable;
 import org.apache.hive.common.util.HashCodeUtil;
 
+import com.google.common.annotations.VisibleForTesting;
+
 /*
- * An single byte array value hash map optimized for vector map join.
+ * An bytes key hash map optimized for vector map join.
+ *
+ * This is the abstract base for the multi-key and string bytes key hash map implementations.
  */
 public abstract class VectorMapJoinFastBytesHashMap
         extends VectorMapJoinFastBytesHashTable
@@ -37,6 +44,8 @@ public abstract class VectorMapJoinFastBytesHashMap
 
   private VectorMapJoinFastValueStore valueStore;
 
+  protected BytesWritable testValueBytesWritable;
+
   @Override
   public VectorMapJoinHashMapResult createHashMapResult() {
     return new VectorMapJoinFastValueStore.HashMapResult();
@@ -56,7 +65,6 @@ public abstract class VectorMapJoinFastBytesHashMap
       slotTriples[tripleIndex + 1] = hashCode;
       slotTriples[tripleIndex + 2] = valueStore.addFirst(valueBytes, 0, valueLength);
       // LOG.debug("VectorMapJoinFastBytesHashMap add first keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2]));
-      keysAssigned++;
     } else {
       // Add another value.
       // LOG.debug("VectorMapJoinFastBytesHashMap add more keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2]));

http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java
index 5d8ed2d..aaf3497 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java
@@ -18,16 +18,23 @@
 
 package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast;
 
+import java.io.IOException;
+
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.apache.hadoop.hive.ql.exec.JoinUtil;
 import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashMultiSet;
 import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMultiSetResult;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.io.BytesWritable;
 import org.apache.hive.common.util.HashCodeUtil;
 
+import com.google.common.annotations.VisibleForTesting;
+
 /*
- * An single byte array value hash multi-set optimized for vector map join.
+ * An bytes key hash multi-set optimized for vector map join.
+ *
+ * This is the abstract base for the multi-key and string bytes key hash multi-set implementations.
  */
 public abstract class VectorMapJoinFastBytesHashMultiSet
         extends VectorMapJoinFastBytesHashTable
@@ -51,7 +58,6 @@ public abstract class VectorMapJoinFastBytesHashMultiSet
       slotTriples[tripleIndex + 1] = hashCode;
       slotTriples[tripleIndex + 2] = 1;    // Count.
       // LOG.debug("VectorMapJoinFastBytesHashMap add first keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2]));
-      keysAssigned++;
     } else {
       // Add another value.
       // LOG.debug("VectorMapJoinFastBytesHashMap add more keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2]));

http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java
index 990a2e5..841183e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java
@@ -27,7 +27,9 @@ import org.apache.hadoop.io.BytesWritable;
 import org.apache.hive.common.util.HashCodeUtil;
 
 /*
- * An single byte array value hash multi-set optimized for vector map join.
+ * An bytes key hash set optimized for vector map join.
+ *
+ * This is the abstract base for the multi-key and string bytes key hash set implementations.
  */
 public abstract class VectorMapJoinFastBytesHashSet
         extends VectorMapJoinFastBytesHashTable
@@ -50,7 +52,6 @@ public abstract class VectorMapJoinFastBytesHashSet
       slotTriples[tripleIndex] = keyStore.add(keyBytes, keyStart, keyLength);
       slotTriples[tripleIndex + 1] = hashCode;
       slotTriples[tripleIndex + 2] = 1;    // Existence
-      keysAssigned++;
     }
   }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java
index 6b536f0..d6e107b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java
@@ -42,8 +42,7 @@ public abstract class VectorMapJoinFastBytesHashTable
 
   protected VectorMapJoinFastKeyStore keyStore;
 
-  private BytesWritable testKeyBytesWritable;
-  private BytesWritable testValueBytesWritable;
+  protected BytesWritable testKeyBytesWritable;
 
   @Override
   public void putRow(BytesWritable currentKey, BytesWritable currentValue) throws HiveException, IOException {
@@ -53,17 +52,6 @@ public abstract class VectorMapJoinFastBytesHashTable
     add(keyBytes, 0, keyLength, currentValue);
   }
 
-  @VisibleForTesting
-  public void putRow(byte[] currentKey, byte[] currentValue) throws HiveException, IOException {
-    if (testKeyBytesWritable == null) {
-      testKeyBytesWritable = new BytesWritable();
-      testValueBytesWritable = new BytesWritable();
-    }
-    testKeyBytesWritable.set(currentKey, 0, currentKey.length);
-    testValueBytesWritable.set(currentValue, 0, currentValue.length);
-    putRow(testKeyBytesWritable, testValueBytesWritable);
-  }
-
   protected abstract void assignSlot(int slot, byte[] keyBytes, int keyStart, int keyLength,
           long hashCode, boolean isNewKey, BytesWritable currentValue);
 

http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMap.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMap.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMap.java
index 1384fc9..cd51d0d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMap.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMap.java
@@ -18,17 +18,22 @@
 
 package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast;
 
+import java.io.IOException;
+
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.apache.hadoop.hive.ql.exec.JoinUtil;
 import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult;
 import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinLongHashMap;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType;
 import org.apache.hadoop.io.BytesWritable;
 import org.apache.hive.common.util.HashCodeUtil;
 
+import com.google.common.annotations.VisibleForTesting;
+
 /*
- * An single long value map optimized for vector map join.
+ * An single LONG key hash map optimized for vector map join.
  */
 public class VectorMapJoinFastLongHashMap
              extends VectorMapJoinFastLongHashTable
@@ -38,11 +43,26 @@ public class VectorMapJoinFastLongHashMap
 
   protected VectorMapJoinFastValueStore valueStore;
 
+  private BytesWritable testValueBytesWritable;
+
   @Override
   public VectorMapJoinHashMapResult createHashMapResult() {
     return new VectorMapJoinFastValueStore.HashMapResult();
   }
 
+  /*
+   * A Unit Test convenience method for putting key and value into the hash table using the
+   * actual types.
+   */
+  @VisibleForTesting
+  public void testPutRow(long currentKey, byte[] currentValue) throws HiveException, IOException {
+    if (testValueBytesWritable == null) {
+      testValueBytesWritable = new BytesWritable();
+    }
+    testValueBytesWritable.set(currentValue, 0, currentValue.length);
+    add(currentKey, testValueBytesWritable);
+  }
+
   @Override
   public void assignSlot(int slot, long key, boolean isNewKey, BytesWritable currentValue) {
 

http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMultiSet.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMultiSet.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMultiSet.java
index 94bf706..032233a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMultiSet.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMultiSet.java
@@ -31,8 +31,10 @@ import org.apache.hadoop.hive.serde2.SerDeException;
 import org.apache.hadoop.io.BytesWritable;
 import org.apache.hive.common.util.HashCodeUtil;
 
+import com.google.common.annotations.VisibleForTesting;
+
 /*
- * An single long value multi-set optimized for vector map join.
+ * An single LONG key hash multi-set optimized for vector map join.
  */
 public class VectorMapJoinFastLongHashMultiSet
              extends VectorMapJoinFastLongHashTable
@@ -45,6 +47,15 @@ public class VectorMapJoinFastLongHashMultiSet
     return new VectorMapJoinFastHashMultiSet.HashMultiSetResult();
   }
 
+  /*
+   * A Unit Test convenience method for putting the key into the hash table using the
+   * actual type.
+   */
+  @VisibleForTesting
+  public void testPutRow(long currentKey) throws HiveException, IOException {
+    add(currentKey, null);
+  }
+
   @Override
   public void assignSlot(int slot, long key, boolean isNewKey, BytesWritable currentValue) {
 

http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashSet.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashSet.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashSet.java
index 2cbc548..21701d4 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashSet.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashSet.java
@@ -18,18 +18,23 @@
 
 package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast;
 
+import java.io.IOException;
+
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.apache.hadoop.hive.ql.exec.JoinUtil;
 import org.apache.hadoop.hive.ql.exec.JoinUtil.JoinResult;
 import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashSetResult;
 import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinLongHashSet;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType;
 import org.apache.hadoop.io.BytesWritable;
 import org.apache.hive.common.util.HashCodeUtil;
 
+import com.google.common.annotations.VisibleForTesting;
+
 /*
- * An single long value multi-set optimized for vector map join.
+ * An single LONG key hash set optimized for vector map join.
  */
 public class VectorMapJoinFastLongHashSet
              extends VectorMapJoinFastLongHashTable
@@ -42,6 +47,15 @@ public class VectorMapJoinFastLongHashSet
     return new VectorMapJoinFastHashSet.HashSetResult();
   }
 
+  /*
+   * A Unit Test convenience method for putting the key into the hash table using the
+   * actual type.
+   */
+  @VisibleForTesting
+  public void testPutRow(long currentKey) throws HiveException, IOException {
+    add(currentKey, null);
+  }
+
   @Override
   public void assignSlot(int slot, long key, boolean isNewKey, BytesWritable currentValue) {
 

http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java
index f37f056..0a502e0 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java
@@ -58,8 +58,6 @@ public abstract class VectorMapJoinFastLongHashTable
   private long min;
   private long max;
 
-  private BytesWritable testValueBytesWritable;
-
   @Override
   public boolean useMinMax() {
     return useMinMax;
@@ -90,17 +88,6 @@ public abstract class VectorMapJoinFastLongHashTable
     add(key, currentValue);
   }
 
-
-  @VisibleForTesting
-  public void putRow(long currentKey, byte[] currentValue) throws HiveException, IOException {
-    if (testValueBytesWritable == null) {
-      testValueBytesWritable = new BytesWritable();
-    }
-    testValueBytesWritable.set(currentValue, 0, currentValue.length);
-    add(currentKey, testValueBytesWritable);
-  }
-
-
   protected abstract void assignSlot(int slot, long key, boolean isNewKey, BytesWritable currentValue);
 
   public void add(long key, BytesWritable currentValue) {

http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMap.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMap.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMap.java
index 9a9fb8d..cee3b3b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMap.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMap.java
@@ -18,17 +18,34 @@
 
 package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast;
 
+import java.io.IOException;
+
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.io.BytesWritable;
+
 import com.google.common.annotations.VisibleForTesting;
 
 /*
  * An multi-key value hash map optimized for vector map join.
+ *
+ * The key is stored as the provided bytes (uninterpreted).
  */
 public class VectorMapJoinFastMultiKeyHashMap
         extends VectorMapJoinFastBytesHashMap {
 
+  /*
+   * A Unit Test convenience method for putting key and value into the hash table using the
+   * actual types.
+   */
   @VisibleForTesting
-  public VectorMapJoinFastMultiKeyHashMap(int initialCapacity, float loadFactor, int wbSize) {
-    this(false, initialCapacity, loadFactor, wbSize);
+  public void testPutRow(byte[] currentKey, byte[] currentValue) throws HiveException, IOException {
+    if (testKeyBytesWritable == null) {
+      testKeyBytesWritable = new BytesWritable();
+      testValueBytesWritable = new BytesWritable();
+    }
+    testKeyBytesWritable.set(currentKey, 0, currentKey.length);
+    testValueBytesWritable.set(currentValue, 0, currentValue.length);
+    putRow(testKeyBytesWritable, testValueBytesWritable);
   }
 
   public VectorMapJoinFastMultiKeyHashMap(

http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMultiSet.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMultiSet.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMultiSet.java
index a8744a5..ff82ac4 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMultiSet.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMultiSet.java
@@ -18,15 +18,38 @@
 
 package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast;
 
+import java.io.IOException;
+
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.io.BytesWritable;
+
+import com.google.common.annotations.VisibleForTesting;
+
 /*
- * An multi-key value hash multi-set optimized for vector map join.
+ * An multi-key hash multi-set optimized for vector map join.
+ *
+ * The key is stored as the provided bytes (uninterpreted).
  */
 public class VectorMapJoinFastMultiKeyHashMultiSet
         extends VectorMapJoinFastBytesHashMultiSet {
 
+  /*
+   * A Unit Test convenience method for putting the key into the hash table using the
+   * actual type.
+   */
+  @VisibleForTesting
+  public void testPutRow(byte[] currentKey) throws HiveException, IOException {
+    if (testKeyBytesWritable == null) {
+      testKeyBytesWritable = new BytesWritable();
+    }
+    testKeyBytesWritable.set(currentKey, 0, currentKey.length);
+    putRow(testKeyBytesWritable, null);
+  }
+
   public VectorMapJoinFastMultiKeyHashMultiSet(
         boolean isOuterJoin,
         int initialCapacity, float loadFactor, int writeBuffersSize) {
     super(initialCapacity, loadFactor, writeBuffersSize);
   }
+
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashSet.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashSet.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashSet.java
index a8048e5..de0666d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashSet.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashSet.java
@@ -18,15 +18,39 @@
 
 package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast;
 
+import java.io.IOException;
+
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.io.BytesWritable;
+
+import com.google.common.annotations.VisibleForTesting;
+
 /*
- * An multi-key value hash set optimized for vector map join.
+ * An multi-key hash set optimized for vector map join.
+ *
+ * The key is stored as the provided bytes (uninterpreted).
  */
 public class VectorMapJoinFastMultiKeyHashSet
         extends VectorMapJoinFastBytesHashSet {
 
+  /*
+   * A Unit Test convenience method for putting the key into the hash table using the
+   * actual type.
+   */
+  @VisibleForTesting
+  public void testPutRow(byte[] currentKey) throws HiveException, IOException {
+    if (testKeyBytesWritable == null) {
+      testKeyBytesWritable = new BytesWritable();
+    }
+    testKeyBytesWritable.set(currentKey, 0, currentKey.length);
+    putRow(testKeyBytesWritable, null);
+  }
+
   public VectorMapJoinFastMultiKeyHashSet(
         boolean isOuterJoin,
         int initialCapacity, float loadFactor, int writeBuffersSize) {
     super(initialCapacity, loadFactor, writeBuffersSize);
   }
+
+
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMap.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMap.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMap.java
index 6f181b2..35af1d1 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMap.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMap.java
@@ -24,7 +24,9 @@ import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.io.BytesWritable;
 
 /*
- * An single byte array value hash map optimized for vector map join.
+ * An single STRING key hash map optimized for vector map join.
+ *
+ * The key will be deserialized and just the bytes will be stored.
  */
 public class VectorMapJoinFastStringHashMap extends VectorMapJoinFastBytesHashMap {
 

http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMultiSet.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMultiSet.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMultiSet.java
index 9653b71..36120b7 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMultiSet.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMultiSet.java
@@ -24,7 +24,9 @@ import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.io.BytesWritable;
 
 /*
- * An single byte array value hash map optimized for vector map join.
+ * An single STRING key hash multi-set optimized for vector map join.
+ *
+ * The key will be deserialized and just the bytes will be stored.
  */
 public class VectorMapJoinFastStringHashMultiSet extends VectorMapJoinFastBytesHashMultiSet {
 

http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashSet.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashSet.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashSet.java
index 6419a0b..2ed6ab3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashSet.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashSet.java
@@ -24,7 +24,9 @@ import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.io.BytesWritable;
 
 /*
- * An single byte array value hash map optimized for vector map join.
+ * An single STRING key hash set optimized for vector map join.
+ *
+ * The key will be deserialized and just the bytes will be stored.
  */
 public class VectorMapJoinFastStringHashSet extends VectorMapJoinFastBytesHashSet {
 

http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastValueStore.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastValueStore.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastValueStore.java
index 570a747..f96e32b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastValueStore.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastValueStore.java
@@ -23,7 +23,9 @@ import org.slf4j.LoggerFactory;
 import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult;
 import org.apache.hadoop.hive.serde2.WriteBuffers;
 import org.apache.hadoop.hive.serde2.WriteBuffers.ByteSegmentRef;
-import org.apache.hadoop.hive.serde2.WriteBuffers.Position;;
+import org.apache.hadoop.hive.serde2.WriteBuffers.Position;
+
+import com.google.common.base.Preconditions;
 
 
 // Supports random access.
@@ -142,7 +144,6 @@ public class VectorMapJoinFastValueStore {
     }
 
     public void set(VectorMapJoinFastValueStore valueStore, long valueRefWord) {
-      // LOG.debug("VectorMapJoinFastValueStore set valueRefWord " + Long.toHexString(valueRefWord));
 
       this.valueStore = valueStore;
       this.valueRefWord = valueRefWord;
@@ -217,6 +218,10 @@ public class VectorMapJoinFastValueStore {
 
       if (readIndex == 0) {
         /*
+         * Positioned to first.
+         */
+
+        /*
          * Extract information from reference word from slot table.
          */
         absoluteValueOffset =
@@ -226,19 +231,32 @@ public class VectorMapJoinFastValueStore {
         valueStore.writeBuffers.setReadPoint(absoluteValueOffset, readPos);
 
         if (isSingleRow) {
+          /*
+           * One element.
+           */
           isNextEof = true;
 
           valueLength =
               (int) ((valueRefWord & SmallValueLength.bitMask) >> SmallValueLength.bitShift);
           boolean isValueLengthSmall = (valueLength != SmallValueLength.allBitsOn);
           if (!isValueLengthSmall) {
-            // And, if current value is big we must read it.
+
+            // {Big Value Len} {Big Value Bytes}
             valueLength = valueStore.writeBuffers.readVInt(readPos);
+          } else {
+
+            // {Small Value Bytes}
+            // (use small length from valueWordRef)
           }
         } else {
+          /*
+           * First of Multiple elements.
+           */
           isNextEof = false;
 
-          // 2nd and beyond records have a relative offset word at the beginning.
+          /*
+           * Read the relative offset word at the beginning 2nd and beyond records.
+           */
           long relativeOffsetWord = valueStore.writeBuffers.readVLong(readPos);
 
           long relativeOffset =
@@ -246,25 +264,31 @@ public class VectorMapJoinFastValueStore {
 
           nextAbsoluteValueOffset = absoluteValueOffset - relativeOffset;
 
+          valueLength =
+              (int) ((valueRefWord & SmallValueLength.bitMask) >> SmallValueLength.bitShift);
+          boolean isValueLengthSmall = (valueLength != SmallValueLength.allBitsOn);
+
+          /*
+           * Optionally, read current value's big length.  {Big Value Len} {Big Value Bytes}
+           * Since this is the first record, the valueRefWord directs us.
+           */
+          if (!isValueLengthSmall) {
+            valueLength = valueStore.writeBuffers.readVInt(readPos);
+          }
+
           isNextLast = ((relativeOffsetWord & IsNextValueLastFlag.flagOnMask) != 0);
           isNextValueLengthSmall =
               ((relativeOffsetWord & IsNextValueLengthSmallFlag.flagOnMask) != 0);
-        }
 
-        valueLength =
-            (int) ((valueRefWord & SmallValueLength.bitMask) >> SmallValueLength.bitShift);
-        boolean isValueLengthSmall = (valueLength != SmallValueLength.allBitsOn);
-        if (!isValueLengthSmall) {
-          // And, if current value is big we must read it.
-          valueLength = valueStore.writeBuffers.readVInt(readPos);
-        }
-
-        // 2nd and beyond have the next value's small length in the current record.
-        if (isNextValueLengthSmall) {
-          nextSmallValueLength = valueStore.writeBuffers.readVInt(readPos);
-        } else {
-          nextSmallValueLength = -1;
-        }
+          /*
+           * Optionally, the next value's small length could be a 2nd integer...
+           */
+          if (isNextValueLengthSmall) {
+            nextSmallValueLength = valueStore.writeBuffers.readVInt(readPos);
+          } else {
+            nextSmallValueLength = -1;
+          }
+       }
 
       } else {
         if (isNextEof) {
@@ -277,24 +301,37 @@ public class VectorMapJoinFastValueStore {
         valueStore.writeBuffers.setReadPoint(absoluteValueOffset, readPos);
 
         if (isNextLast) {
+          /*
+           * No realativeOffsetWord in last value.  (This was the first value written.)
+           */
           isNextEof = true;
 
           if (isNextValueLengthSmall) {
+
+            // {Small Value Bytes}
             valueLength = nextSmallValueLength;
           } else {
-            valueLength = (int) valueStore.writeBuffers.readVLong(readPos);
+
+            // {Big Value Len} {Big Value Bytes}
+            valueLength = valueStore.writeBuffers.readVInt(readPos);
           }
         } else {
+          /*
+           * {Rel Offset Word} [Big Value Len] [Next Value Small Len] {Value Bytes}
+           *
+           * 2nd and beyond records have a relative offset word at the beginning.
+           */
           isNextEof = false;
 
-          // 2nd and beyond records have a relative offset word at the beginning.
           long relativeOffsetWord = valueStore.writeBuffers.readVLong(readPos);
 
-          // Read current value's big length now, if necessary.
+          /*
+           * Optionally, read current value's big length.  {Big Value Len} {Big Value Bytes}
+           */
           if (isNextValueLengthSmall) {
             valueLength = nextSmallValueLength;
           } else {
-            valueLength = (int) valueStore.writeBuffers.readVLong(readPos);
+            valueLength = valueStore.writeBuffers.readVInt(readPos);
           }
 
           long relativeOffset =
@@ -305,9 +342,13 @@ public class VectorMapJoinFastValueStore {
           isNextLast = ((relativeOffsetWord & IsNextValueLastFlag.flagOnMask) != 0);
           isNextValueLengthSmall =
               ((relativeOffsetWord & IsNextValueLengthSmallFlag.flagOnMask) != 0);
+
+          /*
+           * Optionally, the next value's small length could be a 2nd integer in the value's
+           * information.
+           */
           if (isNextValueLengthSmall) {
-            // TODO: Write readVInt
-            nextSmallValueLength = (int) valueStore.writeBuffers.readVLong(readPos);
+            nextSmallValueLength = valueStore.writeBuffers.readVInt(readPos);
           } else {
             nextSmallValueLength = -1;
           }
@@ -396,6 +437,51 @@ public class VectorMapJoinFastValueStore {
     private static final long flagOnMask = 1L << bitShift;
   }
 
+  private static String valueRefWordToString(long valueRef) {
+    StringBuilder sb = new StringBuilder();
+
+    sb.append(Long.toHexString(valueRef));
+    sb.append(", ");
+    if ((valueRef & IsInvalidFlag.flagOnMask) != 0) {
+      sb.append("(Invalid optimized hash table reference), ");
+    }
+    /*
+     * Extract information.
+     */
+    long absoluteValueOffset =
+        (valueRef & AbsoluteValueOffset.bitMask);
+    int smallValueLength =
+        (int) ((valueRef & SmallValueLength.bitMask) >> SmallValueLength.bitShift);
+    boolean isValueLengthSmall = (smallValueLength != SmallValueLength.allBitsOn);
+    int cappedCount =
+        (int) ((valueRef & CappedCount.bitMask) >> CappedCount.bitShift);
+    boolean isValueLast =
+        ((valueRef & IsLastFlag.flagOnMask) != 0);
+
+    sb.append("absoluteValueOffset ");
+    sb.append(absoluteValueOffset);
+    sb.append(" (");
+    sb.append(Long.toHexString(absoluteValueOffset));
+    sb.append("), ");
+
+    if (isValueLengthSmall) {
+      sb.append("smallValueLength ");
+      sb.append(smallValueLength);
+      sb.append(", ");
+    } else {
+      sb.append("isValueLengthSmall = false, ");
+    }
+
+    sb.append("cappedCount ");
+    sb.append(cappedCount);
+    sb.append(", ");
+
+    sb.append("isValueLast ");
+    sb.append(isValueLast);
+
+    return sb.toString();
+  }
+
   /**
    * Relative Offset Word stored at the beginning of all but the last value that has a
    * relative offset and 2 flags.
@@ -431,6 +517,33 @@ public class VectorMapJoinFastValueStore {
     private static final long bitMask = allBitsOn << bitShift;
   }
 
+  private static String relativeOffsetWordToString(long relativeOffsetWord) {
+    StringBuilder sb = new StringBuilder();
+
+    sb.append(Long.toHexString(relativeOffsetWord));
+    sb.append(", ");
+
+    long nextRelativeOffset =
+        (relativeOffsetWord & NextRelativeValueOffset.bitMask) >> NextRelativeValueOffset.bitShift;
+    sb.append("nextRelativeOffset ");
+    sb.append(nextRelativeOffset);
+    sb.append(" (");
+    sb.append(Long.toHexString(nextRelativeOffset));
+    sb.append("), ");
+
+    boolean isNextLast = ((relativeOffsetWord & IsNextValueLastFlag.flagOnMask) != 0);
+    sb.append("isNextLast ");
+    sb.append(isNextLast);
+    sb.append(", ");
+
+    boolean isNextValueLengthSmall =
+        ((relativeOffsetWord & IsNextValueLengthSmallFlag.flagOnMask) != 0);
+    sb.append("isNextValueLengthSmall ");
+    sb.append(isNextValueLengthSmall);
+
+    return sb.toString();
+  }
+
   public long addFirst(byte[] valueBytes, int valueStart, int valueLength) {
 
     // First value is written without: next relative offset, next value length, is next value last
@@ -473,8 +586,6 @@ public class VectorMapJoinFastValueStore {
       valueRefWord |= SmallValueLength.allBitsOnBitShifted;
     }
 
-    // LOG.debug("VectorMapJoinFastValueStore addFirst valueLength " + valueLength + " newAbsoluteOffset " + newAbsoluteOffset + " valueRefWord " + Long.toHexString(valueRefWord));
-
     // The lower bits are the absolute value offset.
     valueRefWord |= newAbsoluteOffset;
 
@@ -499,8 +610,6 @@ public class VectorMapJoinFastValueStore {
     boolean isOldValueLast =
         ((oldValueRef & IsLastFlag.flagOnMask) != 0);
 
-    // LOG.debug("VectorMapJoinFastValueStore addMore isOldValueLast " + isOldValueLast + " oldSmallValueLength " + oldSmallValueLength + " oldAbsoluteValueOffset " + oldAbsoluteValueOffset + " oldValueRef " + Long.toHexString(oldValueRef));
-
     /*
      * Write information about the old value (which becomes our next) at the beginning
      * of our new value.
@@ -523,12 +632,6 @@ public class VectorMapJoinFastValueStore {
 
     writeBuffers.writeVLong(relativeOffsetWord);
 
-    // When the next value is small it was not recorded with the old (i.e. next) value and we
-    // have to remember it.
-    if (isOldValueLengthSmall) {
-      writeBuffers.writeVInt(oldSmallValueLength);
-    }
-
     // Now, we have written all information about the next value, work on the *new* value.
 
     long newValueRef = ((long) newCappedCount) << CappedCount.bitShift;
@@ -536,18 +639,28 @@ public class VectorMapJoinFastValueStore {
     if (!isNewValueSmall) {
       // Use magic value to indicating we are writing the big value length.
       newValueRef |= ((long) SmallValueLength.allBitsOn << SmallValueLength.bitShift);
+      Preconditions.checkState(
+          (int) ((newValueRef & SmallValueLength.bitMask) >> SmallValueLength.bitShift) ==
+              SmallValueLength.allBitsOn);
       writeBuffers.writeVInt(valueLength);
+
     } else {
       // Caller must remember small value length.
       newValueRef |= ((long) valueLength) << SmallValueLength.bitShift;
     }
+
+    // When the next value is small it was not recorded with the old (i.e. next) value and we
+    // have to remember it.
+    if (isOldValueLengthSmall) {
+
+      writeBuffers.writeVInt(oldSmallValueLength);
+    }
+
     writeBuffers.write(valueBytes, valueStart, valueLength);
 
     // The lower bits are the absolute value offset.
     newValueRef |=  newAbsoluteOffset;
 
-    // LOG.debug("VectorMapJoinFastValueStore addMore valueLength " + valueLength + " newAbsoluteOffset " + newAbsoluteOffset + " newValueRef " + Long.toHexString(newValueRef));
-
     return newValueRef;
   }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/RandomRowObjectSource.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/RandomRowObjectSource.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/RandomRowObjectSource.java
deleted file mode 100644
index 2d4baa0..0000000
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/RandomRowObjectSource.java
+++ /dev/null
@@ -1,388 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.ql.exec.vector;
-
-import java.sql.Date;
-import java.sql.Timestamp;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Random;
-
-import junit.framework.TestCase;
-
-import org.apache.commons.lang.ArrayUtils;
-import org.apache.commons.lang.StringUtils;
-import org.apache.hadoop.hive.common.type.HiveChar;
-import org.apache.hadoop.hive.common.type.HiveDecimal;
-import org.apache.hadoop.hive.common.type.HiveIntervalDayTime;
-import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth;
-import org.apache.hadoop.hive.common.type.HiveVarchar;
-import org.apache.hadoop.hive.common.type.RandomTypeUtil;
-import org.apache.hadoop.hive.serde.serdeConstants;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
-import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBooleanObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableByteObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDateObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDoubleObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableFloatObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveCharObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveDecimalObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveIntervalDayTimeObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveIntervalYearMonthObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveVarcharObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableIntObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableLongObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableShortObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableTimestampObjectInspector;
-import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
-import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;
-import org.apache.hadoop.io.BooleanWritable;
-import org.apache.hive.common.util.DateUtils;
-
-/**
- * Generate object inspector and random row object[].
- */
-public class RandomRowObjectSource {
-
-  private Random r;
-
-  private int columnCount;
-
-  private List<String> typeNames;
-
-  private PrimitiveCategory[] primitiveCategories;
-
-  private PrimitiveTypeInfo[] primitiveTypeInfos;
-
-  private List<ObjectInspector> primitiveObjectInspectorList;
-
-  private StructObjectInspector rowStructObjectInspector;
-
-  public List<String> typeNames() {
-    return typeNames;
-  }
-
-  public PrimitiveCategory[] primitiveCategories() {
-    return primitiveCategories;
-  }
-
-  public PrimitiveTypeInfo[] primitiveTypeInfos() {
-    return primitiveTypeInfos;
-  }
-
-  public StructObjectInspector rowStructObjectInspector() {
-    return rowStructObjectInspector;
-  }
-
-  public void init(Random r) {
-    this.r = r;
-    chooseSchema();
-  }
-
-  private static String[] possibleHiveTypeNames = {
-      "boolean",
-      "tinyint",
-      "smallint",
-      "int",
-      "bigint",
-      "date",
-      "float",
-      "double",
-      "string",
-      "char",
-      "varchar",
-      "binary",
-      "date",
-      "timestamp",
-      serdeConstants.INTERVAL_YEAR_MONTH_TYPE_NAME,
-      serdeConstants.INTERVAL_DAY_TIME_TYPE_NAME,
-      "decimal"
-  };
-
-  private void chooseSchema() {
-    columnCount = 1 + r.nextInt(20);
-    typeNames = new ArrayList<String>(columnCount);
-    primitiveCategories = new PrimitiveCategory[columnCount];
-    primitiveTypeInfos = new PrimitiveTypeInfo[columnCount];
-    primitiveObjectInspectorList = new ArrayList<ObjectInspector>(columnCount);
-    List<String> columnNames = new ArrayList<String>(columnCount);
-    for (int c = 0; c < columnCount; c++) {
-      columnNames.add(String.format("col%d", c));
-      int typeNum = r.nextInt(possibleHiveTypeNames.length);
-      String typeName = possibleHiveTypeNames[typeNum];
-      if (typeName.equals("char")) {
-        int maxLength = 1 + r.nextInt(100);
-        typeName = String.format("char(%d)", maxLength);
-      } else if (typeName.equals("varchar")) {
-        int maxLength = 1 + r.nextInt(100);
-        typeName = String.format("varchar(%d)", maxLength);
-      } else if (typeName.equals("decimal")) {
-        typeName = String.format("decimal(%d,%d)", HiveDecimal.SYSTEM_DEFAULT_PRECISION, HiveDecimal.SYSTEM_DEFAULT_SCALE);
-      }
-      PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString(typeName);
-      primitiveTypeInfos[c] = primitiveTypeInfo;
-      PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory();
-      primitiveCategories[c] = primitiveCategory;
-      primitiveObjectInspectorList.add(PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(primitiveTypeInfo));
-      typeNames.add(typeName);
-    }
-    rowStructObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, primitiveObjectInspectorList);
-  }
-
-  public Object[][] randomRows(int n) {
-    Object[][] result = new Object[n][];
-    for (int i = 0; i < n; i++) {
-      result[i] = randomRow();
-    }
-    return result;
-  }
-
-  public Object[] randomRow() {
-    Object row[] = new Object[columnCount];
-    for (int c = 0; c < columnCount; c++) {
-      Object object = randomObject(c);
-      if (object == null) {
-        throw new Error("Unexpected null for column " + c);
-      }
-      row[c] = getWritableObject(c, object);
-      if (row[c] == null) {
-        throw new Error("Unexpected null for writable for column " + c);
-      }
-    }
-    return row;
-  }
-
-  public Object getWritableObject(int column, Object object) {
-    ObjectInspector objectInspector = primitiveObjectInspectorList.get(column);
-    PrimitiveCategory primitiveCategory = primitiveCategories[column];
-    PrimitiveTypeInfo primitiveTypeInfo = primitiveTypeInfos[column];
-    switch (primitiveCategory) {
-    case BOOLEAN:
-      return ((WritableBooleanObjectInspector) objectInspector).create((boolean) object);
-    case BYTE:
-      return ((WritableByteObjectInspector) objectInspector).create((byte) object);
-    case SHORT:
-      return ((WritableShortObjectInspector) objectInspector).create((short) object);
-    case INT:
-      return ((WritableIntObjectInspector) objectInspector).create((int) object);
-    case LONG:
-      return ((WritableLongObjectInspector) objectInspector).create((long) object);
-    case DATE:
-      return ((WritableDateObjectInspector) objectInspector).create((Date) object);
-    case FLOAT:
-      return ((WritableFloatObjectInspector) objectInspector).create((float) object);
-    case DOUBLE:
-      return ((WritableDoubleObjectInspector) objectInspector).create((double) object);
-    case STRING:
-      return ((WritableStringObjectInspector) objectInspector).create((String) object);
-    case CHAR:
-      {
-        WritableHiveCharObjectInspector writableCharObjectInspector = 
-                new WritableHiveCharObjectInspector( (CharTypeInfo) primitiveTypeInfo);
-        return writableCharObjectInspector.create(new HiveChar(StringUtils.EMPTY, -1));
-      }
-    case VARCHAR:
-      {
-        WritableHiveVarcharObjectInspector writableVarcharObjectInspector = 
-                new WritableHiveVarcharObjectInspector( (VarcharTypeInfo) primitiveTypeInfo);
-        return writableVarcharObjectInspector.create(new HiveVarchar(StringUtils.EMPTY, -1));
-      }
-    case BINARY:
-      return PrimitiveObjectInspectorFactory.writableBinaryObjectInspector.create(ArrayUtils.EMPTY_BYTE_ARRAY);
-    case TIMESTAMP:
-      return ((WritableTimestampObjectInspector) objectInspector).create(new Timestamp(0));
-    case INTERVAL_YEAR_MONTH:
-      return ((WritableHiveIntervalYearMonthObjectInspector) objectInspector).create(new HiveIntervalYearMonth(0));
-    case INTERVAL_DAY_TIME:
-      return ((WritableHiveIntervalDayTimeObjectInspector) objectInspector).create(new HiveIntervalDayTime(0, 0));
-    case DECIMAL:
-      {
-        WritableHiveDecimalObjectInspector writableDecimalObjectInspector =
-                new WritableHiveDecimalObjectInspector((DecimalTypeInfo) primitiveTypeInfo);
-        return writableDecimalObjectInspector.create(HiveDecimal.ZERO);
-      }
-    default:
-      throw new Error("Unknown primitive category " + primitiveCategory);
-    }
-  }
-
-  public Object randomObject(int column) {
-    PrimitiveCategory primitiveCategory = primitiveCategories[column];
-    PrimitiveTypeInfo primitiveTypeInfo = primitiveTypeInfos[column];
-    switch (primitiveCategory) {
-    case BOOLEAN:
-      return Boolean.valueOf(r.nextInt(1) == 1);
-    case BYTE:
-      return Byte.valueOf((byte) r.nextInt());
-    case SHORT:
-      return Short.valueOf((short) r.nextInt());
-    case INT:
-      return Integer.valueOf(r.nextInt());
-    case LONG:
-      return Long.valueOf(r.nextLong());
-    case DATE:
-      return getRandDate(r);
-    case FLOAT:
-      return Float.valueOf(r.nextFloat() * 10 - 5);
-    case DOUBLE:
-      return Double.valueOf(r.nextDouble() * 10 - 5);
-    case STRING:
-      return getRandString(r);
-    case CHAR:
-      return getRandHiveChar(r, (CharTypeInfo) primitiveTypeInfo);
-    case VARCHAR:
-      return getRandHiveVarchar(r, (VarcharTypeInfo) primitiveTypeInfo);
-    case BINARY:
-      return getRandBinary(r, 1 + r.nextInt(100));
-    case TIMESTAMP:
-      return RandomTypeUtil.getRandTimestamp(r);
-    case INTERVAL_YEAR_MONTH:
-      return getRandIntervalYearMonth(r);
-    case INTERVAL_DAY_TIME:
-      return getRandIntervalDayTime(r);
-    case DECIMAL:
-      return getRandHiveDecimal(r, (DecimalTypeInfo) primitiveTypeInfo);
-    default:
-      throw new Error("Unknown primitive category " + primitiveCategory);
-    }
-  }
-
-  public static String getRandString(Random r) {
-    return getRandString(r, null, r.nextInt(10));
-  }
-
-  public static String getRandString(Random r, String characters, int length) {
-    if (characters == null) {
-      characters = "ABCDEFGHIJKLMabcdefghijklm";
-    }
-    StringBuilder sb = new StringBuilder();
-    sb.append("");
-    for (int i = 0; i < length; i++) {
-      if (characters == null) {
-        sb.append((char) (r.nextInt(128)));
-      } else {
-        sb.append(characters.charAt(r.nextInt(characters.length())));
-      }
-    }
-    return sb.toString();
-  }
-
-  public static HiveChar getRandHiveChar(Random r, CharTypeInfo charTypeInfo) {
-    int maxLength = 1 + r.nextInt(charTypeInfo.getLength());
-    String randomString = getRandString(r, "abcdefghijklmnopqrstuvwxyz", 100);
-    HiveChar hiveChar = new HiveChar(randomString, maxLength);
-    return hiveChar;
-  }
-
-  public static HiveVarchar getRandHiveVarchar(Random r, VarcharTypeInfo varcharTypeInfo) {
-    int maxLength = 1 + r.nextInt(varcharTypeInfo.getLength());
-    String randomString = getRandString(r, "abcdefghijklmnopqrstuvwxyz", 100);
-    HiveVarchar hiveVarchar = new HiveVarchar(randomString, maxLength);
-    return hiveVarchar;
-  }
-
-  public static byte[] getRandBinary(Random r, int len){
-    byte[] bytes = new byte[len];
-    for (int j = 0; j < len; j++){
-      bytes[j] = Byte.valueOf((byte) r.nextInt());
-    }
-    return bytes;
-  }
-
-  private static final String DECIMAL_CHARS = "0123456789";
-
-  public static HiveDecimal getRandHiveDecimal(Random r, DecimalTypeInfo decimalTypeInfo) {
-    while (true) {
-      StringBuilder sb = new StringBuilder();
-      int precision = 1 + r.nextInt(18);
-      int scale = 0 + r.nextInt(precision + 1);
-
-      int integerDigits = precision - scale;
-
-      if (r.nextBoolean()) {
-        sb.append("-");
-      }
-
-      if (integerDigits == 0) {
-        sb.append("0");
-      } else {
-        sb.append(getRandString(r, DECIMAL_CHARS, integerDigits));
-      }
-      if (scale != 0) {
-        sb.append(".");
-        sb.append(getRandString(r, DECIMAL_CHARS, scale));
-      }
-
-      HiveDecimal bd = HiveDecimal.create(sb.toString());
-      if (bd.scale() > bd.precision()) {
-        // Sometimes weird decimals are produced?
-        continue;
-      }
-
-      return bd;
-    }
-  }
-
-  public static Date getRandDate(Random r) {
-    String dateStr = String.format("%d-%02d-%02d",
-        Integer.valueOf(1800 + r.nextInt(500)),  // year
-        Integer.valueOf(1 + r.nextInt(12)),      // month
-        Integer.valueOf(1 + r.nextInt(28)));     // day
-    Date dateVal = Date.valueOf(dateStr);
-    return dateVal;
-  }
-
-  public static HiveIntervalYearMonth getRandIntervalYearMonth(Random r) {
-    String yearMonthSignStr = r.nextInt(2) == 0 ? "" : "-";
-    String intervalYearMonthStr = String.format("%s%d-%d",
-        yearMonthSignStr,
-        Integer.valueOf(1800 + r.nextInt(500)),  // year
-        Integer.valueOf(0 + r.nextInt(12)));     // month
-    HiveIntervalYearMonth intervalYearMonthVal = HiveIntervalYearMonth.valueOf(intervalYearMonthStr);
-    TestCase.assertTrue(intervalYearMonthVal != null);
-    return intervalYearMonthVal;
-  }
-
-  public static HiveIntervalDayTime getRandIntervalDayTime(Random r) {
-    String optionalNanos = "";
-    if (r.nextInt(2) == 1) {
-      optionalNanos = String.format(".%09d",
-          Integer.valueOf(0 + r.nextInt(DateUtils.NANOS_PER_SEC)));
-    }
-    String yearMonthSignStr = r.nextInt(2) == 0 ? "" : "-";
-    String dayTimeStr = String.format("%s%d %02d:%02d:%02d%s",
-        yearMonthSignStr,
-        Integer.valueOf(1 + r.nextInt(28)),      // day
-        Integer.valueOf(0 + r.nextInt(24)),      // hour
-        Integer.valueOf(0 + r.nextInt(60)),      // minute
-        Integer.valueOf(0 + r.nextInt(60)),      // second
-        optionalNanos);
-    HiveIntervalDayTime intervalDayTimeVal = HiveIntervalDayTime.valueOf(dayTimeStr);
-    TestCase.assertTrue(intervalDayTimeVal != null);
-    return intervalDayTimeVal;
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorRowObject.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorRowObject.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorRowObject.java
index 959a2af..c55d951 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorRowObject.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorRowObject.java
@@ -19,13 +19,10 @@
 package org.apache.hadoop.hive.ql.exec.vector;
 
 import java.util.Arrays;
-import java.util.HashMap;
-import java.util.Map;
 import java.util.Random;
 
 import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.fast.RandomRowObjectSource;
 
 import junit.framework.TestCase;
 
@@ -50,7 +47,7 @@ public class TestVectorRowObject extends TestCase {
     }
   }
 
-  void testVectorRowObject(int caseNum, Random r) throws HiveException {
+  void testVectorRowObject(int caseNum, boolean sort, Random r) throws HiveException {
 
     String[] emptyScratchTypeNames = new String[0];
 
@@ -74,6 +71,9 @@ public class TestVectorRowObject extends TestCase {
     vectorExtractRow.init(source.typeNames());
 
     Object[][] randomRows = source.randomRows(100000);
+    if (sort) {
+      source.sort(randomRows);
+    }
     int firstRandomRowIndex = 0;
     for (int i = 0; i < randomRows.length; i++) {
       Object[] row = randomRows[i];
@@ -93,14 +93,22 @@ public class TestVectorRowObject extends TestCase {
 
   public void testVectorRowObject() throws Throwable {
 
-  try {
-    Random r = new Random(5678);
-    for (int c = 0; c < 10; c++) {
-      testVectorRowObject(c, r);
+    try {
+      Random r = new Random(5678);
+
+      int caseNum = 0;
+      for (int i = 0; i < 10; i++) {
+        testVectorRowObject(caseNum, false, r);
+        caseNum++;
+      }
+
+      // Try one sorted.
+      testVectorRowObject(caseNum, true, r);
+      caseNum++;
+
+    } catch (Throwable e) {
+      e.printStackTrace();
+      throw e;
     }
-  } catch (Throwable e) {
-    e.printStackTrace();
-    throw e;
-  }
   }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSerDeRow.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSerDeRow.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSerDeRow.java
index e37d2bf..da69ee3 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSerDeRow.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSerDeRow.java
@@ -22,8 +22,6 @@ import java.io.IOException;
 import java.sql.Date;
 import java.sql.Timestamp;
 import java.util.Arrays;
-import java.util.HashMap;
-import java.util.Map;
 import java.util.Properties;
 import java.util.Random;
 
@@ -50,6 +48,7 @@ import org.apache.hadoop.hive.serde2.ByteStream.Output;
 import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead;
 import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite;
 import org.apache.hadoop.hive.serde2.fast.DeserializeRead;
+import org.apache.hadoop.hive.serde2.fast.RandomRowObjectSource;
 import org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters;
 import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe;
 import org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleDeserializeRead;
@@ -62,7 +61,6 @@ import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
 import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;
 import org.apache.hadoop.hive.serde2.fast.SerializeWrite;
 import org.apache.hadoop.io.BooleanWritable;
@@ -86,7 +84,7 @@ public class TestVectorSerDeRow extends TestCase {
     LAZY_SIMPLE
   }
 
-  void deserializeAndVerify(Output output, DeserializeRead deserializeRead, 
+  void deserializeAndVerify(Output output, DeserializeRead deserializeRead,
               RandomRowObjectSource source, Object[] expectedRow)
               throws HiveException, IOException {
     deserializeRead.set(output.getData(),  0, output.getLength());
@@ -523,7 +521,7 @@ public class TestVectorSerDeRow extends TestCase {
 
     // Set the configuration parameters
     tbl.setProperty(serdeConstants.SERIALIZATION_FORMAT, "9");
-    
+
     tbl.setProperty("columns", fieldNames);
     tbl.setProperty("columns.types", fieldTypes);
 

http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CheckFastHashTable.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CheckFastHashTable.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CheckFastHashTable.java
new file mode 100644
index 0000000..3a23584
--- /dev/null
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CheckFastHashTable.java
@@ -0,0 +1,721 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Random;
+import java.util.TreeMap;
+
+import junit.framework.TestCase;
+
+import org.apache.hadoop.hive.ql.exec.JoinUtil;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr;
+import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult;
+import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMultiSetResult;
+import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashSetResult;
+import org.apache.hadoop.hive.serde2.WriteBuffers;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.WritableComparator;
+
+import com.google.common.base.Preconditions;
+
+import static org.junit.Assert.*;
+
+public class CheckFastHashTable {
+
+  public static boolean findMatch(byte[] valueBytes, List<byte[]> actualValues, int actualCount, boolean[] taken) {
+    for (int i = 0; i < actualCount; i++) {
+      if (!taken[i]) {
+        byte[] actualBytes = actualValues.get(i);
+        if (StringExpr.compare(valueBytes, 0, valueBytes.length, actualBytes, 0, actualBytes.length) == 0) {
+          taken[i] = true;
+          return true;
+        }
+      }
+    }
+    return false;
+  }
+
+  public static void verifyHashMapValues(VectorMapJoinHashMapResult hashMapResult,
+      List<byte[]> values) {
+
+    int valueCount = values.size();
+
+    WriteBuffers.ByteSegmentRef ref = hashMapResult.first();
+
+    // Read through all values.
+    List<byte[]> actualValues = new ArrayList<byte[]>();
+    while (true) {
+      byte[] bytes = ref.getBytes();
+      int offset = (int) ref.getOffset();
+      int length = ref.getLength();
+
+      if (length == 0) {
+        actualValues.add(new byte[0]);
+      } else {
+        actualValues.add(Arrays.copyOfRange(bytes, offset, offset + length));
+      }
+      ref = hashMapResult.next();
+      if (ref == null) {
+        break;
+      }
+    }
+
+    int actualCount = actualValues.size();
+
+    if (valueCount != actualCount) {
+      TestCase.fail("values.size() " + valueCount + " does not match actualCount " + actualCount);
+    }
+
+    boolean[] taken = new boolean[actualCount];
+
+    for (int i = 0; i < actualCount; i++) {
+      byte[] valueBytes = values.get(i);
+
+      if (!findMatch(valueBytes, actualValues, actualCount, taken)) {
+        List<Integer> availableLengths = new ArrayList<Integer>();
+        for (int a = 0; a < actualCount; a++) {
+          if (!taken[a]) {
+            availableLengths.add(actualValues.get(a).length);
+          }
+        }
+        TestCase.fail("No match for actual value (valueBytes length " + valueBytes.length +
+            ", availableLengths " + availableLengths.toString() + " of " + actualCount + " total)");
+      }
+    }
+  }
+
+  /*
+   * Element for Key: Long x Hash Table: HashMap
+   */
+  public static class FastLongHashMapElement {
+    private long key;
+    private List<byte[]> values;
+
+    public FastLongHashMapElement(long key, byte[] firstValue) {
+      this.key = key;
+      values = new ArrayList<byte[]>();
+      values.add(firstValue);
+    }
+
+    public long getKey() {
+      return key;
+    }
+
+    public int getValueCount() {
+      return values.size();
+    }
+
+    public List<byte[]> getValues() {
+      return values;
+    }
+
+    public void addValue(byte[] value) {
+      values.add(value);
+    }
+  }
+
+  /*
+   * Verify table for Key: Long x Hash Table: HashMap
+   */
+  public static class VerifyFastLongHashMap {
+
+    private int count;
+
+    private FastLongHashMapElement[] array;
+
+    private HashMap<Long, Integer> keyValueMap;
+
+    public VerifyFastLongHashMap() {
+      count = 0;
+      array = new FastLongHashMapElement[50];
+      keyValueMap = new HashMap<Long, Integer>();
+    }
+
+    public int getCount() {
+      return count;
+    }
+
+    public boolean contains(long key) {
+      return keyValueMap.containsKey(key);
+    }
+
+    public void add(long key, byte[] value) {
+      if (keyValueMap.containsKey(key)) {
+        int index = keyValueMap.get(key);
+        array[index].addValue(value);
+      } else {
+        if (count >= array.length) {
+          // Grow.
+          FastLongHashMapElement[] newArray = new FastLongHashMapElement[array.length * 2];
+          System.arraycopy(array, 0, newArray, 0, count);
+          array = newArray;
+        }
+        array[count] = new FastLongHashMapElement(key, value);
+        keyValueMap.put(key, count);
+        count++;
+      }
+    }
+
+    public long addRandomExisting(byte[] value, Random r) {
+      Preconditions.checkState(count > 0);
+      int index = r.nextInt(count);
+      array[index].addValue(value);
+      return array[index].getKey();
+    }
+
+    public long getKey(int index) {
+      return array[index].getKey();
+    }
+
+    public List<byte[]> getValues(int index) {
+      return array[index].getValues();
+    }
+
+    public void verify(VectorMapJoinFastLongHashMap map) {
+      int mapSize = map.size();
+      if (mapSize != count) {
+        TestCase.fail("map.size() does not match expected count");
+      }
+
+      for (int index = 0; index < count; index++) {
+        FastLongHashMapElement element = array[index];
+        long key = element.getKey();
+        List<byte[]> values = element.getValues();
+
+        VectorMapJoinHashMapResult hashMapResult = map.createHashMapResult();
+        JoinUtil.JoinResult joinResult = map.lookup(key, hashMapResult);
+        if (joinResult != JoinUtil.JoinResult.MATCH) {
+          assertTrue(false);
+        }
+
+        verifyHashMapValues(hashMapResult, values);
+      }
+    }
+  }
+
+  /*
+   * Element for Key: byte[] x Hash Table: HashMap
+   */
+  public static class FastBytesHashMapElement {
+    private byte[] key;
+    private List<byte[]> values;
+
+    public FastBytesHashMapElement(byte[] key, byte[] firstValue) {
+      this.key = key;
+      values = new ArrayList<byte[]>();
+      values.add(firstValue);
+    }
+
+    public byte[] getKey() {
+      return key;
+    }
+
+    public int getValueCount() {
+      return values.size();
+    }
+
+    public List<byte[]> getValues() {
+      return values;
+    }
+
+    public void addValue(byte[] value) {
+      values.add(value);
+    }
+  }
+
+  /*
+   * Verify table for Key: byte[] x Hash Table: HashMap
+   */
+  public static class VerifyFastBytesHashMap {
+
+    private int count;
+
+    private FastBytesHashMapElement[] array;
+
+    private TreeMap<BytesWritable, Integer> keyValueMap;
+
+    public VerifyFastBytesHashMap() {
+      count = 0;
+      array = new FastBytesHashMapElement[50];
+
+      // We use BytesWritable because it supports Comparable for our TreeMap.
+      keyValueMap = new TreeMap<BytesWritable, Integer>();
+    }
+
+    public int getCount() {
+      return count;
+    }
+
+    public boolean contains(byte[] key) {
+      BytesWritable keyBytesWritable = new BytesWritable(key, key.length);
+      return keyValueMap.containsKey(keyBytesWritable);
+    }
+
+    public void add(byte[] key, byte[] value) {
+      BytesWritable keyBytesWritable = new BytesWritable(key, key.length);
+      if (keyValueMap.containsKey(keyBytesWritable)) {
+        int index = keyValueMap.get(keyBytesWritable);
+        array[index].addValue(value);
+      } else {
+        if (count >= array.length) {
+          // Grow.
+          FastBytesHashMapElement[] newArray = new FastBytesHashMapElement[array.length * 2];
+          System.arraycopy(array, 0, newArray, 0, count);
+          array = newArray;
+        }
+        array[count] = new FastBytesHashMapElement(key, value);
+        keyValueMap.put(keyBytesWritable, count);
+        count++;
+      }
+    }
+
+    public byte[] addRandomExisting(byte[] value, Random r) {
+      Preconditions.checkState(count > 0);
+      int index = r.nextInt(count);
+      array[index].addValue(value);
+      return array[index].getKey();
+    }
+
+    public byte[] getKey(int index) {
+      return array[index].getKey();
+    }
+
+    public List<byte[]> getValues(int index) {
+      return array[index].getValues();
+    }
+
+    public void verify(VectorMapJoinFastBytesHashMap map) {
+      int mapSize = map.size();
+      if (mapSize != count) {
+        TestCase.fail("map.size() does not match expected count");
+      }
+
+      for (int index = 0; index < count; index++) {
+        FastBytesHashMapElement element = array[index];
+        byte[] key = element.getKey();
+        List<byte[]> values = element.getValues();
+
+        VectorMapJoinHashMapResult hashMapResult = map.createHashMapResult();
+        JoinUtil.JoinResult joinResult = map.lookup(key, 0, key.length, hashMapResult);
+        if (joinResult != JoinUtil.JoinResult.MATCH) {
+          assertTrue(false);
+        }
+
+        verifyHashMapValues(hashMapResult, values);
+      }
+    }
+  }
+
+  /*
+   * Element for Key: Long x Hash Table: HashMultiSet
+   */
+  public static class FastLongHashMultiSetElement {
+    private long key;
+    private int multiSetCount;
+
+    public FastLongHashMultiSetElement(long key) {
+      this.key = key;
+      multiSetCount = 1;
+    }
+
+    public long getKey() {
+      return key;
+    }
+
+    public int getMultiSetCount() {
+      return multiSetCount;
+    }
+
+    public void incrementMultiSetCount() {
+      multiSetCount++;
+    }
+  }
+
+  /*
+   * Verify table for Key: Long x Hash Table: HashMultiSet
+   */
+  public static class VerifyFastLongHashMultiSet {
+
+    private int count;
+
+    private FastLongHashMultiSetElement[] array;
+
+    private HashMap<Long, Integer> keyValueMap;
+
+    public VerifyFastLongHashMultiSet() {
+      count = 0;
+      array = new FastLongHashMultiSetElement[50];
+      keyValueMap = new HashMap<Long, Integer>();
+    }
+
+    public int getCount() {
+      return count;
+    }
+
+    public boolean contains(long key) {
+      return keyValueMap.containsKey(key);
+    }
+
+    public void add(long key) {
+      if (keyValueMap.containsKey(key)) {
+        int index = keyValueMap.get(key);
+        array[index].incrementMultiSetCount();
+      } else {
+        if (count >= array.length) {
+          // Grow.
+          FastLongHashMultiSetElement[] newArray = new FastLongHashMultiSetElement[array.length * 2];
+          System.arraycopy(array, 0, newArray, 0, count);
+          array = newArray;
+        }
+        array[count] = new FastLongHashMultiSetElement(key);
+        keyValueMap.put(key, count);
+        count++;
+      }
+    }
+
+    public long addRandomExisting(byte[] value, Random r) {
+      Preconditions.checkState(count > 0);
+      int index = r.nextInt(count);
+      array[index].incrementMultiSetCount();
+      return array[index].getKey();
+    }
+
+    public long getKey(int index) {
+      return array[index].getKey();
+    }
+
+    public int getMultiSetCount(int index) {
+      return array[index].getMultiSetCount();
+    }
+
+    public void verify(VectorMapJoinFastLongHashMultiSet map) {
+      int mapSize = map.size();
+      if (mapSize != count) {
+        TestCase.fail("map.size() does not match expected count");
+      }
+
+      for (int index = 0; index < count; index++) {
+        FastLongHashMultiSetElement element = array[index];
+        long key = element.getKey();
+        int multiSetCount = element.getMultiSetCount();
+
+        VectorMapJoinHashMultiSetResult hashMultiSetResult = map.createHashMultiSetResult();
+        JoinUtil.JoinResult joinResult = map.contains(key, hashMultiSetResult);
+        if (joinResult != JoinUtil.JoinResult.MATCH) {
+          assertTrue(false);
+        }
+
+        assertEquals(hashMultiSetResult.count(), multiSetCount);
+      }
+    }
+  }
+
+  /*
+   * Element for Key: byte[] x Hash Table: HashMultiSet
+   */
+  public static class FastBytesHashMultiSetElement {
+    private byte[] key;
+    private int multiSetCount;
+
+    public FastBytesHashMultiSetElement(byte[] key) {
+      this.key = key;
+      multiSetCount = 1;
+    }
+
+    public byte[] getKey() {
+      return key;
+    }
+
+    public int getMultiSetCount() {
+      return multiSetCount;
+    }
+
+    public void incrementMultiSetCount() {
+      multiSetCount++;
+    }
+  }
+
+  /*
+   * Verify table for Key: byte[] x Hash Table: HashMultiSet
+   */
+  public static class VerifyFastBytesHashMultiSet {
+
+    private int count;
+
+    private FastBytesHashMultiSetElement[] array;
+
+    private TreeMap<BytesWritable, Integer> keyValueMap;
+
+    public VerifyFastBytesHashMultiSet() {
+      count = 0;
+      array = new FastBytesHashMultiSetElement[50];
+
+      // We use BytesWritable because it supports Comparable for our TreeMap.
+      keyValueMap = new TreeMap<BytesWritable, Integer>();
+    }
+
+    public int getCount() {
+      return count;
+    }
+
+    public boolean contains(byte[] key) {
+      BytesWritable keyBytesWritable = new BytesWritable(key, key.length);
+      return keyValueMap.containsKey(keyBytesWritable);
+    }
+
+    public void add(byte[] key) {
+      BytesWritable keyBytesWritable = new BytesWritable(key, key.length);
+      if (keyValueMap.containsKey(keyBytesWritable)) {
+        int index = keyValueMap.get(keyBytesWritable);
+        array[index].incrementMultiSetCount();
+      } else {
+        if (count >= array.length) {
+          // Grow.
+          FastBytesHashMultiSetElement[] newArray = new FastBytesHashMultiSetElement[array.length * 2];
+          System.arraycopy(array, 0, newArray, 0, count);
+          array = newArray;
+        }
+        array[count] = new FastBytesHashMultiSetElement(key);
+        keyValueMap.put(keyBytesWritable, count);
+        count++;
+      }
+    }
+
+    public byte[] addRandomExisting(byte[] value, Random r) {
+      Preconditions.checkState(count > 0);
+      int index = r.nextInt(count);
+      array[index].incrementMultiSetCount();
+      return array[index].getKey();
+    }
+
+    public byte[] getKey(int index) {
+      return array[index].getKey();
+    }
+
+    public int getMultiSetCount(int index) {
+      return array[index].getMultiSetCount();
+    }
+
+    public void verify(VectorMapJoinFastBytesHashMultiSet map) {
+      int mapSize = map.size();
+      if (mapSize != count) {
+        TestCase.fail("map.size() does not match expected count");
+      }
+
+      for (int index = 0; index < count; index++) {
+        FastBytesHashMultiSetElement element = array[index];
+        byte[] key = element.getKey();
+        int multiSetCount = element.getMultiSetCount();
+
+        VectorMapJoinHashMultiSetResult hashMultiSetResult = map.createHashMultiSetResult();
+        JoinUtil.JoinResult joinResult = map.contains(key, 0, key.length, hashMultiSetResult);
+        if (joinResult != JoinUtil.JoinResult.MATCH) {
+          assertTrue(false);
+        }
+
+        assertEquals(hashMultiSetResult.count(), multiSetCount);
+      }
+    }
+  }
+
+  /*
+   * Element for Key: Long x Hash Table: HashSet
+   */
+  public static class FastLongHashSetElement {
+    private long key;
+
+    public FastLongHashSetElement(long key) {
+      this.key = key;
+    }
+
+    public long getKey() {
+      return key;
+    }
+  }
+
+  /*
+   * Verify table for Key: Long x Hash Table: HashSet
+   */
+  public static class VerifyFastLongHashSet {
+
+    private int count;
+
+    private FastLongHashSetElement[] array;
+
+    private HashMap<Long, Integer> keyValueMap;
+
+    public VerifyFastLongHashSet() {
+      count = 0;
+      array = new FastLongHashSetElement[50];
+      keyValueMap = new HashMap<Long, Integer>();
+    }
+
+    public int getCount() {
+      return count;
+    }
+
+    public boolean contains(long key) {
+      return keyValueMap.containsKey(key);
+    }
+
+    public void add(long key) {
+      if (keyValueMap.containsKey(key)) {
+        // Already exists.
+      } else {
+        if (count >= array.length) {
+          // Grow.
+          FastLongHashSetElement[] newArray = new FastLongHashSetElement[array.length * 2];
+          System.arraycopy(array, 0, newArray, 0, count);
+          array = newArray;
+        }
+        array[count] = new FastLongHashSetElement(key);
+        keyValueMap.put(key, count);
+        count++;
+      }
+    }
+
+    public long addRandomExisting(byte[] value, Random r) {
+      Preconditions.checkState(count > 0);
+      int index = r.nextInt(count);
+
+      // Exists aleady.
+
+      return array[index].getKey();
+    }
+
+    public long getKey(int index) {
+      return array[index].getKey();
+    }
+
+    public void verify(VectorMapJoinFastLongHashSet map) {
+      int mapSize = map.size();
+      if (mapSize != count) {
+        TestCase.fail("map.size() does not match expected count");
+      }
+
+      for (int index = 0; index < count; index++) {
+        FastLongHashSetElement element = array[index];
+        long key = element.getKey();
+
+        VectorMapJoinHashSetResult hashSetResult = map.createHashSetResult();
+        JoinUtil.JoinResult joinResult = map.contains(key, hashSetResult);
+        if (joinResult != JoinUtil.JoinResult.MATCH) {
+          assertTrue(false);
+        }
+      }
+    }
+  }
+
+  /*
+   * Element for Key: byte[] x Hash Table: HashSet
+   */
+  public static class FastBytesHashSetElement {
+    private byte[] key;
+
+    public FastBytesHashSetElement(byte[] key) {
+      this.key = key;
+    }
+
+    public byte[] getKey() {
+      return key;
+    }
+  }
+
+  /*
+   * Verify table for Key: byte[] x Hash Table: HashSet
+   */
+  public static class VerifyFastBytesHashSet {
+
+    private int count;
+
+    private FastBytesHashSetElement[] array;
+
+    private TreeMap<BytesWritable, Integer> keyValueMap;
+
+    public VerifyFastBytesHashSet() {
+      count = 0;
+      array = new FastBytesHashSetElement[50];
+
+      // We use BytesWritable because it supports Comparable for our TreeMap.
+      keyValueMap = new TreeMap<BytesWritable, Integer>();
+    }
+
+    public int getCount() {
+      return count;
+    }
+
+    public boolean contains(byte[] key) {
+      BytesWritable keyBytesWritable = new BytesWritable(key, key.length);
+      return keyValueMap.containsKey(keyBytesWritable);
+    }
+
+    public void add(byte[] key) {
+      BytesWritable keyBytesWritable = new BytesWritable(key, key.length);
+      if (keyValueMap.containsKey(keyBytesWritable)) {
+        // Already exists.
+      } else {
+        if (count >= array.length) {
+          // Grow.
+          FastBytesHashSetElement[] newArray = new FastBytesHashSetElement[array.length * 2];
+          System.arraycopy(array, 0, newArray, 0, count);
+          array = newArray;
+        }
+        array[count] = new FastBytesHashSetElement(key);
+        keyValueMap.put(keyBytesWritable, count);
+        count++;
+      }
+    }
+
+    public byte[] addRandomExisting(byte[] value, Random r) {
+      Preconditions.checkState(count > 0);
+      int index = r.nextInt(count);
+
+      // Already exists.
+
+      return array[index].getKey();
+    }
+
+    public byte[] getKey(int index) {
+      return array[index].getKey();
+    }
+
+    public void verify(VectorMapJoinFastBytesHashSet map) {
+      int mapSize = map.size();
+      if (mapSize != count) {
+        TestCase.fail("map.size() does not match expected count");
+      }
+
+      for (int index = 0; index < count; index++) {
+        FastBytesHashSetElement element = array[index];
+        byte[] key = element.getKey();
+
+        VectorMapJoinHashSetResult hashSetResult = map.createHashSetResult();
+        JoinUtil.JoinResult joinResult = map.contains(key, 0, key.length, hashSetResult);
+        if (joinResult != JoinUtil.JoinResult.MATCH) {
+          assertTrue(false);
+        }
+      }
+    }
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CommonFastHashTable.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CommonFastHashTable.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CommonFastHashTable.java
index c2375e0..90e8f33 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CommonFastHashTable.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CommonFastHashTable.java
@@ -18,16 +18,8 @@
 
 package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast;
 
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
 import java.util.Random;
 
-import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult;
-import org.apache.hadoop.hive.serde2.WriteBuffers;
-
-import static org.junit.Assert.*;
-
 public class CommonFastHashTable {
 
   protected static final float LOAD_FACTOR = 0.75f;
@@ -39,6 +31,10 @@ public class CommonFastHashTable {
   protected static final int LARGE_CAPACITY = 8388608;
   protected static Random random;
 
+  protected static int MAX_KEY_LENGTH = 100;
+
+  protected static int MAX_VALUE_LENGTH = 1000;
+
   public static int generateLargeCount() {
     int count = 0;
     if (random.nextInt(100) != 0) {
@@ -75,54 +71,4 @@ public class CommonFastHashTable {
     }
     return count;
   }
-  public static void verifyHashMapResult(VectorMapJoinHashMapResult hashMapResult,
-      RandomByteArrayStream randomByteArrayStream ) {
-
-    List<byte[]> resultBytes = new ArrayList<byte[]>();
-    int count = 0;
-    if (hashMapResult.hasRows()) {
-      WriteBuffers.ByteSegmentRef ref = hashMapResult.first();
-      while (ref != null) {
-        count++;
-        byte[] bytes = ref.getBytes();
-        int offset = (int) ref.getOffset();
-        int length = ref.getLength();
-        resultBytes.add(Arrays.copyOfRange(bytes, offset, offset + length));
-        ref = hashMapResult.next();
-      }
-    } else {
-      assertTrue(hashMapResult.isEof());
-    }
-    if (randomByteArrayStream.size() != count) {
-      assertTrue(false);
-    }
-
-    for (int i = 0; i < count; ++i) {
-      byte[] bytes = resultBytes.get(i);
-      if (!randomByteArrayStream.contains(bytes)) {
-        assertTrue(false);
-      }
-    }
-  }
-
-  public static void verifyHashMapResult(VectorMapJoinHashMapResult hashMapResult,
-      byte[] valueBytes ) {
-
-    assertTrue(hashMapResult.hasRows());
-    WriteBuffers.ByteSegmentRef ref = hashMapResult.first();
-    byte[] bytes = ref.getBytes();
-    int offset = (int) ref.getOffset();
-    int length = ref.getLength();
-    assertTrue(valueBytes.length == length);
-    boolean match = true;  // Assume
-    for (int j = 0; j < length; j++) {
-      if (valueBytes[j] != bytes[offset + j]) {
-        match = false;
-        break;
-      }
-    }
-    if (!match) {
-      assertTrue(false);
-    }
-  }
 }
\ No newline at end of file