You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by mm...@apache.org on 2016/05/15 03:51:44 UTC
[3/3] hive git commit: HIVE-13682: EOFException with fast hashtable
(Matt McCline, reviewed by Sergey Shelukhin)
HIVE-13682: EOFException with fast hashtable (Matt McCline, reviewed by Sergey Shelukhin)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/4533d21b
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/4533d21b
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/4533d21b
Branch: refs/heads/master
Commit: 4533d21b0be487e1f11fcc95578a2ba103e72a64
Parents: fbeee62
Author: Matt McCline <mm...@hortonworks.com>
Authored: Sat May 14 20:44:27 2016 -0700
Committer: Matt McCline <mm...@hortonworks.com>
Committed: Sat May 14 20:44:27 2016 -0700
----------------------------------------------------------------------
.../fast/VectorMapJoinFastBytesHashMap.java | 12 +-
.../VectorMapJoinFastBytesHashMultiSet.java | 10 +-
.../fast/VectorMapJoinFastBytesHashSet.java | 5 +-
.../fast/VectorMapJoinFastBytesHashTable.java | 14 +-
.../fast/VectorMapJoinFastLongHashMap.java | 22 +-
.../fast/VectorMapJoinFastLongHashMultiSet.java | 13 +-
.../fast/VectorMapJoinFastLongHashSet.java | 16 +-
.../fast/VectorMapJoinFastLongHashTable.java | 13 -
.../fast/VectorMapJoinFastMultiKeyHashMap.java | 21 +-
.../VectorMapJoinFastMultiKeyHashMultiSet.java | 25 +-
.../fast/VectorMapJoinFastMultiKeyHashSet.java | 26 +-
.../fast/VectorMapJoinFastStringHashMap.java | 4 +-
.../VectorMapJoinFastStringHashMultiSet.java | 4 +-
.../fast/VectorMapJoinFastStringHashSet.java | 4 +-
.../fast/VectorMapJoinFastValueStore.java | 187 ++++-
.../ql/exec/vector/RandomRowObjectSource.java | 388 ----------
.../ql/exec/vector/TestVectorRowObject.java | 34 +-
.../hive/ql/exec/vector/TestVectorSerDeRow.java | 8 +-
.../vector/mapjoin/fast/CheckFastHashTable.java | 721 +++++++++++++++++++
.../mapjoin/fast/CommonFastHashTable.java | 62 +-
.../fast/TestVectorMapJoinFastBytesHashMap.java | 272 +++++++
.../TestVectorMapJoinFastBytesHashMultiSet.java | 253 +++++++
.../fast/TestVectorMapJoinFastBytesHashSet.java | 252 +++++++
.../fast/TestVectorMapJoinFastLongHashMap.java | 303 ++++----
.../TestVectorMapJoinFastLongHashMultiSet.java | 252 +++++++
.../fast/TestVectorMapJoinFastLongHashSet.java | 250 +++++++
.../TestVectorMapJoinFastMultiKeyHashMap.java | 231 ------
.../hive/serde2/fast/RandomRowObjectSource.java | 423 +++++++++++
.../fast/LazyBinaryDeserializeRead.java | 2 +-
.../apache/hadoop/hive/serde2/VerifyFast.java | 123 ++--
.../hive/serde2/binarysortable/MyTestClass.java | 86 +++
.../binarysortable/TestBinarySortableFast.java | 384 +++++++---
.../hive/serde2/lazy/TestLazySimpleFast.java | 270 ++++---
.../serde2/lazybinary/TestLazyBinaryFast.java | 285 ++++++--
.../hadoop/hive/common/type/RandomTypeUtil.java | 95 +++
35 files changed, 3852 insertions(+), 1218 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java
index 0ff98bd..a4bc188 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java
@@ -18,16 +18,23 @@
package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast;
+import java.io.IOException;
+
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.hive.ql.exec.JoinUtil;
import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashMap;
import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hive.common.util.HashCodeUtil;
+import com.google.common.annotations.VisibleForTesting;
+
/*
- * An single byte array value hash map optimized for vector map join.
+ * An bytes key hash map optimized for vector map join.
+ *
+ * This is the abstract base for the multi-key and string bytes key hash map implementations.
*/
public abstract class VectorMapJoinFastBytesHashMap
extends VectorMapJoinFastBytesHashTable
@@ -37,6 +44,8 @@ public abstract class VectorMapJoinFastBytesHashMap
private VectorMapJoinFastValueStore valueStore;
+ protected BytesWritable testValueBytesWritable;
+
@Override
public VectorMapJoinHashMapResult createHashMapResult() {
return new VectorMapJoinFastValueStore.HashMapResult();
@@ -56,7 +65,6 @@ public abstract class VectorMapJoinFastBytesHashMap
slotTriples[tripleIndex + 1] = hashCode;
slotTriples[tripleIndex + 2] = valueStore.addFirst(valueBytes, 0, valueLength);
// LOG.debug("VectorMapJoinFastBytesHashMap add first keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2]));
- keysAssigned++;
} else {
// Add another value.
// LOG.debug("VectorMapJoinFastBytesHashMap add more keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2]));
http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java
index 5d8ed2d..aaf3497 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java
@@ -18,16 +18,23 @@
package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast;
+import java.io.IOException;
+
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.hive.ql.exec.JoinUtil;
import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashMultiSet;
import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMultiSetResult;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hive.common.util.HashCodeUtil;
+import com.google.common.annotations.VisibleForTesting;
+
/*
- * An single byte array value hash multi-set optimized for vector map join.
+ * An bytes key hash multi-set optimized for vector map join.
+ *
+ * This is the abstract base for the multi-key and string bytes key hash multi-set implementations.
*/
public abstract class VectorMapJoinFastBytesHashMultiSet
extends VectorMapJoinFastBytesHashTable
@@ -51,7 +58,6 @@ public abstract class VectorMapJoinFastBytesHashMultiSet
slotTriples[tripleIndex + 1] = hashCode;
slotTriples[tripleIndex + 2] = 1; // Count.
// LOG.debug("VectorMapJoinFastBytesHashMap add first keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2]));
- keysAssigned++;
} else {
// Add another value.
// LOG.debug("VectorMapJoinFastBytesHashMap add more keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2]));
http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java
index 990a2e5..841183e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java
@@ -27,7 +27,9 @@ import org.apache.hadoop.io.BytesWritable;
import org.apache.hive.common.util.HashCodeUtil;
/*
- * An single byte array value hash multi-set optimized for vector map join.
+ * An bytes key hash set optimized for vector map join.
+ *
+ * This is the abstract base for the multi-key and string bytes key hash set implementations.
*/
public abstract class VectorMapJoinFastBytesHashSet
extends VectorMapJoinFastBytesHashTable
@@ -50,7 +52,6 @@ public abstract class VectorMapJoinFastBytesHashSet
slotTriples[tripleIndex] = keyStore.add(keyBytes, keyStart, keyLength);
slotTriples[tripleIndex + 1] = hashCode;
slotTriples[tripleIndex + 2] = 1; // Existence
- keysAssigned++;
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java
index 6b536f0..d6e107b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java
@@ -42,8 +42,7 @@ public abstract class VectorMapJoinFastBytesHashTable
protected VectorMapJoinFastKeyStore keyStore;
- private BytesWritable testKeyBytesWritable;
- private BytesWritable testValueBytesWritable;
+ protected BytesWritable testKeyBytesWritable;
@Override
public void putRow(BytesWritable currentKey, BytesWritable currentValue) throws HiveException, IOException {
@@ -53,17 +52,6 @@ public abstract class VectorMapJoinFastBytesHashTable
add(keyBytes, 0, keyLength, currentValue);
}
- @VisibleForTesting
- public void putRow(byte[] currentKey, byte[] currentValue) throws HiveException, IOException {
- if (testKeyBytesWritable == null) {
- testKeyBytesWritable = new BytesWritable();
- testValueBytesWritable = new BytesWritable();
- }
- testKeyBytesWritable.set(currentKey, 0, currentKey.length);
- testValueBytesWritable.set(currentValue, 0, currentValue.length);
- putRow(testKeyBytesWritable, testValueBytesWritable);
- }
-
protected abstract void assignSlot(int slot, byte[] keyBytes, int keyStart, int keyLength,
long hashCode, boolean isNewKey, BytesWritable currentValue);
http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMap.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMap.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMap.java
index 1384fc9..cd51d0d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMap.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMap.java
@@ -18,17 +18,22 @@
package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast;
+import java.io.IOException;
+
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.hive.ql.exec.JoinUtil;
import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult;
import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinLongHashMap;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hive.common.util.HashCodeUtil;
+import com.google.common.annotations.VisibleForTesting;
+
/*
- * An single long value map optimized for vector map join.
+ * An single LONG key hash map optimized for vector map join.
*/
public class VectorMapJoinFastLongHashMap
extends VectorMapJoinFastLongHashTable
@@ -38,11 +43,26 @@ public class VectorMapJoinFastLongHashMap
protected VectorMapJoinFastValueStore valueStore;
+ private BytesWritable testValueBytesWritable;
+
@Override
public VectorMapJoinHashMapResult createHashMapResult() {
return new VectorMapJoinFastValueStore.HashMapResult();
}
+ /*
+ * A Unit Test convenience method for putting key and value into the hash table using the
+ * actual types.
+ */
+ @VisibleForTesting
+ public void testPutRow(long currentKey, byte[] currentValue) throws HiveException, IOException {
+ if (testValueBytesWritable == null) {
+ testValueBytesWritable = new BytesWritable();
+ }
+ testValueBytesWritable.set(currentValue, 0, currentValue.length);
+ add(currentKey, testValueBytesWritable);
+ }
+
@Override
public void assignSlot(int slot, long key, boolean isNewKey, BytesWritable currentValue) {
http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMultiSet.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMultiSet.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMultiSet.java
index 94bf706..032233a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMultiSet.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMultiSet.java
@@ -31,8 +31,10 @@ import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hive.common.util.HashCodeUtil;
+import com.google.common.annotations.VisibleForTesting;
+
/*
- * An single long value multi-set optimized for vector map join.
+ * An single LONG key hash multi-set optimized for vector map join.
*/
public class VectorMapJoinFastLongHashMultiSet
extends VectorMapJoinFastLongHashTable
@@ -45,6 +47,15 @@ public class VectorMapJoinFastLongHashMultiSet
return new VectorMapJoinFastHashMultiSet.HashMultiSetResult();
}
+ /*
+ * A Unit Test convenience method for putting the key into the hash table using the
+ * actual type.
+ */
+ @VisibleForTesting
+ public void testPutRow(long currentKey) throws HiveException, IOException {
+ add(currentKey, null);
+ }
+
@Override
public void assignSlot(int slot, long key, boolean isNewKey, BytesWritable currentValue) {
http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashSet.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashSet.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashSet.java
index 2cbc548..21701d4 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashSet.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashSet.java
@@ -18,18 +18,23 @@
package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast;
+import java.io.IOException;
+
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.hive.ql.exec.JoinUtil;
import org.apache.hadoop.hive.ql.exec.JoinUtil.JoinResult;
import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashSetResult;
import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinLongHashSet;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hive.common.util.HashCodeUtil;
+import com.google.common.annotations.VisibleForTesting;
+
/*
- * An single long value multi-set optimized for vector map join.
+ * An single LONG key hash set optimized for vector map join.
*/
public class VectorMapJoinFastLongHashSet
extends VectorMapJoinFastLongHashTable
@@ -42,6 +47,15 @@ public class VectorMapJoinFastLongHashSet
return new VectorMapJoinFastHashSet.HashSetResult();
}
+ /*
+ * A Unit Test convenience method for putting the key into the hash table using the
+ * actual type.
+ */
+ @VisibleForTesting
+ public void testPutRow(long currentKey) throws HiveException, IOException {
+ add(currentKey, null);
+ }
+
@Override
public void assignSlot(int slot, long key, boolean isNewKey, BytesWritable currentValue) {
http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java
index f37f056..0a502e0 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java
@@ -58,8 +58,6 @@ public abstract class VectorMapJoinFastLongHashTable
private long min;
private long max;
- private BytesWritable testValueBytesWritable;
-
@Override
public boolean useMinMax() {
return useMinMax;
@@ -90,17 +88,6 @@ public abstract class VectorMapJoinFastLongHashTable
add(key, currentValue);
}
-
- @VisibleForTesting
- public void putRow(long currentKey, byte[] currentValue) throws HiveException, IOException {
- if (testValueBytesWritable == null) {
- testValueBytesWritable = new BytesWritable();
- }
- testValueBytesWritable.set(currentValue, 0, currentValue.length);
- add(currentKey, testValueBytesWritable);
- }
-
-
protected abstract void assignSlot(int slot, long key, boolean isNewKey, BytesWritable currentValue);
public void add(long key, BytesWritable currentValue) {
http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMap.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMap.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMap.java
index 9a9fb8d..cee3b3b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMap.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMap.java
@@ -18,17 +18,34 @@
package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast;
+import java.io.IOException;
+
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.io.BytesWritable;
+
import com.google.common.annotations.VisibleForTesting;
/*
* An multi-key value hash map optimized for vector map join.
+ *
+ * The key is stored as the provided bytes (uninterpreted).
*/
public class VectorMapJoinFastMultiKeyHashMap
extends VectorMapJoinFastBytesHashMap {
+ /*
+ * A Unit Test convenience method for putting key and value into the hash table using the
+ * actual types.
+ */
@VisibleForTesting
- public VectorMapJoinFastMultiKeyHashMap(int initialCapacity, float loadFactor, int wbSize) {
- this(false, initialCapacity, loadFactor, wbSize);
+ public void testPutRow(byte[] currentKey, byte[] currentValue) throws HiveException, IOException {
+ if (testKeyBytesWritable == null) {
+ testKeyBytesWritable = new BytesWritable();
+ testValueBytesWritable = new BytesWritable();
+ }
+ testKeyBytesWritable.set(currentKey, 0, currentKey.length);
+ testValueBytesWritable.set(currentValue, 0, currentValue.length);
+ putRow(testKeyBytesWritable, testValueBytesWritable);
}
public VectorMapJoinFastMultiKeyHashMap(
http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMultiSet.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMultiSet.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMultiSet.java
index a8744a5..ff82ac4 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMultiSet.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMultiSet.java
@@ -18,15 +18,38 @@
package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast;
+import java.io.IOException;
+
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.io.BytesWritable;
+
+import com.google.common.annotations.VisibleForTesting;
+
/*
- * An multi-key value hash multi-set optimized for vector map join.
+ * An multi-key hash multi-set optimized for vector map join.
+ *
+ * The key is stored as the provided bytes (uninterpreted).
*/
public class VectorMapJoinFastMultiKeyHashMultiSet
extends VectorMapJoinFastBytesHashMultiSet {
+ /*
+ * A Unit Test convenience method for putting the key into the hash table using the
+ * actual type.
+ */
+ @VisibleForTesting
+ public void testPutRow(byte[] currentKey) throws HiveException, IOException {
+ if (testKeyBytesWritable == null) {
+ testKeyBytesWritable = new BytesWritable();
+ }
+ testKeyBytesWritable.set(currentKey, 0, currentKey.length);
+ putRow(testKeyBytesWritable, null);
+ }
+
public VectorMapJoinFastMultiKeyHashMultiSet(
boolean isOuterJoin,
int initialCapacity, float loadFactor, int writeBuffersSize) {
super(initialCapacity, loadFactor, writeBuffersSize);
}
+
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashSet.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashSet.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashSet.java
index a8048e5..de0666d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashSet.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashSet.java
@@ -18,15 +18,39 @@
package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast;
+import java.io.IOException;
+
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.io.BytesWritable;
+
+import com.google.common.annotations.VisibleForTesting;
+
/*
- * An multi-key value hash set optimized for vector map join.
+ * An multi-key hash set optimized for vector map join.
+ *
+ * The key is stored as the provided bytes (uninterpreted).
*/
public class VectorMapJoinFastMultiKeyHashSet
extends VectorMapJoinFastBytesHashSet {
+ /*
+ * A Unit Test convenience method for putting the key into the hash table using the
+ * actual type.
+ */
+ @VisibleForTesting
+ public void testPutRow(byte[] currentKey) throws HiveException, IOException {
+ if (testKeyBytesWritable == null) {
+ testKeyBytesWritable = new BytesWritable();
+ }
+ testKeyBytesWritable.set(currentKey, 0, currentKey.length);
+ putRow(testKeyBytesWritable, null);
+ }
+
public VectorMapJoinFastMultiKeyHashSet(
boolean isOuterJoin,
int initialCapacity, float loadFactor, int writeBuffersSize) {
super(initialCapacity, loadFactor, writeBuffersSize);
}
+
+
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMap.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMap.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMap.java
index 6f181b2..35af1d1 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMap.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMap.java
@@ -24,7 +24,9 @@ import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.io.BytesWritable;
/*
- * An single byte array value hash map optimized for vector map join.
+ * An single STRING key hash map optimized for vector map join.
+ *
+ * The key will be deserialized and just the bytes will be stored.
*/
public class VectorMapJoinFastStringHashMap extends VectorMapJoinFastBytesHashMap {
http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMultiSet.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMultiSet.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMultiSet.java
index 9653b71..36120b7 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMultiSet.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMultiSet.java
@@ -24,7 +24,9 @@ import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.io.BytesWritable;
/*
- * An single byte array value hash map optimized for vector map join.
+ * An single STRING key hash multi-set optimized for vector map join.
+ *
+ * The key will be deserialized and just the bytes will be stored.
*/
public class VectorMapJoinFastStringHashMultiSet extends VectorMapJoinFastBytesHashMultiSet {
http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashSet.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashSet.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashSet.java
index 6419a0b..2ed6ab3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashSet.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashSet.java
@@ -24,7 +24,9 @@ import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.io.BytesWritable;
/*
- * An single byte array value hash map optimized for vector map join.
+ * An single STRING key hash set optimized for vector map join.
+ *
+ * The key will be deserialized and just the bytes will be stored.
*/
public class VectorMapJoinFastStringHashSet extends VectorMapJoinFastBytesHashSet {
http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastValueStore.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastValueStore.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastValueStore.java
index 570a747..f96e32b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastValueStore.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastValueStore.java
@@ -23,7 +23,9 @@ import org.slf4j.LoggerFactory;
import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult;
import org.apache.hadoop.hive.serde2.WriteBuffers;
import org.apache.hadoop.hive.serde2.WriteBuffers.ByteSegmentRef;
-import org.apache.hadoop.hive.serde2.WriteBuffers.Position;;
+import org.apache.hadoop.hive.serde2.WriteBuffers.Position;
+
+import com.google.common.base.Preconditions;
// Supports random access.
@@ -142,7 +144,6 @@ public class VectorMapJoinFastValueStore {
}
public void set(VectorMapJoinFastValueStore valueStore, long valueRefWord) {
- // LOG.debug("VectorMapJoinFastValueStore set valueRefWord " + Long.toHexString(valueRefWord));
this.valueStore = valueStore;
this.valueRefWord = valueRefWord;
@@ -217,6 +218,10 @@ public class VectorMapJoinFastValueStore {
if (readIndex == 0) {
/*
+ * Positioned to first.
+ */
+
+ /*
* Extract information from reference word from slot table.
*/
absoluteValueOffset =
@@ -226,19 +231,32 @@ public class VectorMapJoinFastValueStore {
valueStore.writeBuffers.setReadPoint(absoluteValueOffset, readPos);
if (isSingleRow) {
+ /*
+ * One element.
+ */
isNextEof = true;
valueLength =
(int) ((valueRefWord & SmallValueLength.bitMask) >> SmallValueLength.bitShift);
boolean isValueLengthSmall = (valueLength != SmallValueLength.allBitsOn);
if (!isValueLengthSmall) {
- // And, if current value is big we must read it.
+
+ // {Big Value Len} {Big Value Bytes}
valueLength = valueStore.writeBuffers.readVInt(readPos);
+ } else {
+
+ // {Small Value Bytes}
+ // (use small length from valueWordRef)
}
} else {
+ /*
+ * First of Multiple elements.
+ */
isNextEof = false;
- // 2nd and beyond records have a relative offset word at the beginning.
+ /*
+ * Read the relative offset word at the beginning 2nd and beyond records.
+ */
long relativeOffsetWord = valueStore.writeBuffers.readVLong(readPos);
long relativeOffset =
@@ -246,25 +264,31 @@ public class VectorMapJoinFastValueStore {
nextAbsoluteValueOffset = absoluteValueOffset - relativeOffset;
+ valueLength =
+ (int) ((valueRefWord & SmallValueLength.bitMask) >> SmallValueLength.bitShift);
+ boolean isValueLengthSmall = (valueLength != SmallValueLength.allBitsOn);
+
+ /*
+ * Optionally, read current value's big length. {Big Value Len} {Big Value Bytes}
+ * Since this is the first record, the valueRefWord directs us.
+ */
+ if (!isValueLengthSmall) {
+ valueLength = valueStore.writeBuffers.readVInt(readPos);
+ }
+
isNextLast = ((relativeOffsetWord & IsNextValueLastFlag.flagOnMask) != 0);
isNextValueLengthSmall =
((relativeOffsetWord & IsNextValueLengthSmallFlag.flagOnMask) != 0);
- }
- valueLength =
- (int) ((valueRefWord & SmallValueLength.bitMask) >> SmallValueLength.bitShift);
- boolean isValueLengthSmall = (valueLength != SmallValueLength.allBitsOn);
- if (!isValueLengthSmall) {
- // And, if current value is big we must read it.
- valueLength = valueStore.writeBuffers.readVInt(readPos);
- }
-
- // 2nd and beyond have the next value's small length in the current record.
- if (isNextValueLengthSmall) {
- nextSmallValueLength = valueStore.writeBuffers.readVInt(readPos);
- } else {
- nextSmallValueLength = -1;
- }
+ /*
+ * Optionally, the next value's small length could be a 2nd integer...
+ */
+ if (isNextValueLengthSmall) {
+ nextSmallValueLength = valueStore.writeBuffers.readVInt(readPos);
+ } else {
+ nextSmallValueLength = -1;
+ }
+ }
} else {
if (isNextEof) {
@@ -277,24 +301,37 @@ public class VectorMapJoinFastValueStore {
valueStore.writeBuffers.setReadPoint(absoluteValueOffset, readPos);
if (isNextLast) {
+ /*
+ * No realativeOffsetWord in last value. (This was the first value written.)
+ */
isNextEof = true;
if (isNextValueLengthSmall) {
+
+ // {Small Value Bytes}
valueLength = nextSmallValueLength;
} else {
- valueLength = (int) valueStore.writeBuffers.readVLong(readPos);
+
+ // {Big Value Len} {Big Value Bytes}
+ valueLength = valueStore.writeBuffers.readVInt(readPos);
}
} else {
+ /*
+ * {Rel Offset Word} [Big Value Len] [Next Value Small Len] {Value Bytes}
+ *
+ * 2nd and beyond records have a relative offset word at the beginning.
+ */
isNextEof = false;
- // 2nd and beyond records have a relative offset word at the beginning.
long relativeOffsetWord = valueStore.writeBuffers.readVLong(readPos);
- // Read current value's big length now, if necessary.
+ /*
+ * Optionally, read current value's big length. {Big Value Len} {Big Value Bytes}
+ */
if (isNextValueLengthSmall) {
valueLength = nextSmallValueLength;
} else {
- valueLength = (int) valueStore.writeBuffers.readVLong(readPos);
+ valueLength = valueStore.writeBuffers.readVInt(readPos);
}
long relativeOffset =
@@ -305,9 +342,13 @@ public class VectorMapJoinFastValueStore {
isNextLast = ((relativeOffsetWord & IsNextValueLastFlag.flagOnMask) != 0);
isNextValueLengthSmall =
((relativeOffsetWord & IsNextValueLengthSmallFlag.flagOnMask) != 0);
+
+ /*
+ * Optionally, the next value's small length could be a 2nd integer in the value's
+ * information.
+ */
if (isNextValueLengthSmall) {
- // TODO: Write readVInt
- nextSmallValueLength = (int) valueStore.writeBuffers.readVLong(readPos);
+ nextSmallValueLength = valueStore.writeBuffers.readVInt(readPos);
} else {
nextSmallValueLength = -1;
}
@@ -396,6 +437,51 @@ public class VectorMapJoinFastValueStore {
private static final long flagOnMask = 1L << bitShift;
}
+ private static String valueRefWordToString(long valueRef) {
+ StringBuilder sb = new StringBuilder();
+
+ sb.append(Long.toHexString(valueRef));
+ sb.append(", ");
+ if ((valueRef & IsInvalidFlag.flagOnMask) != 0) {
+ sb.append("(Invalid optimized hash table reference), ");
+ }
+ /*
+ * Extract information.
+ */
+ long absoluteValueOffset =
+ (valueRef & AbsoluteValueOffset.bitMask);
+ int smallValueLength =
+ (int) ((valueRef & SmallValueLength.bitMask) >> SmallValueLength.bitShift);
+ boolean isValueLengthSmall = (smallValueLength != SmallValueLength.allBitsOn);
+ int cappedCount =
+ (int) ((valueRef & CappedCount.bitMask) >> CappedCount.bitShift);
+ boolean isValueLast =
+ ((valueRef & IsLastFlag.flagOnMask) != 0);
+
+ sb.append("absoluteValueOffset ");
+ sb.append(absoluteValueOffset);
+ sb.append(" (");
+ sb.append(Long.toHexString(absoluteValueOffset));
+ sb.append("), ");
+
+ if (isValueLengthSmall) {
+ sb.append("smallValueLength ");
+ sb.append(smallValueLength);
+ sb.append(", ");
+ } else {
+ sb.append("isValueLengthSmall = false, ");
+ }
+
+ sb.append("cappedCount ");
+ sb.append(cappedCount);
+ sb.append(", ");
+
+ sb.append("isValueLast ");
+ sb.append(isValueLast);
+
+ return sb.toString();
+ }
+
/**
* Relative Offset Word stored at the beginning of all but the last value that has a
* relative offset and 2 flags.
@@ -431,6 +517,33 @@ public class VectorMapJoinFastValueStore {
private static final long bitMask = allBitsOn << bitShift;
}
+ private static String relativeOffsetWordToString(long relativeOffsetWord) {
+ StringBuilder sb = new StringBuilder();
+
+ sb.append(Long.toHexString(relativeOffsetWord));
+ sb.append(", ");
+
+ long nextRelativeOffset =
+ (relativeOffsetWord & NextRelativeValueOffset.bitMask) >> NextRelativeValueOffset.bitShift;
+ sb.append("nextRelativeOffset ");
+ sb.append(nextRelativeOffset);
+ sb.append(" (");
+ sb.append(Long.toHexString(nextRelativeOffset));
+ sb.append("), ");
+
+ boolean isNextLast = ((relativeOffsetWord & IsNextValueLastFlag.flagOnMask) != 0);
+ sb.append("isNextLast ");
+ sb.append(isNextLast);
+ sb.append(", ");
+
+ boolean isNextValueLengthSmall =
+ ((relativeOffsetWord & IsNextValueLengthSmallFlag.flagOnMask) != 0);
+ sb.append("isNextValueLengthSmall ");
+ sb.append(isNextValueLengthSmall);
+
+ return sb.toString();
+ }
+
public long addFirst(byte[] valueBytes, int valueStart, int valueLength) {
// First value is written without: next relative offset, next value length, is next value last
@@ -473,8 +586,6 @@ public class VectorMapJoinFastValueStore {
valueRefWord |= SmallValueLength.allBitsOnBitShifted;
}
- // LOG.debug("VectorMapJoinFastValueStore addFirst valueLength " + valueLength + " newAbsoluteOffset " + newAbsoluteOffset + " valueRefWord " + Long.toHexString(valueRefWord));
-
// The lower bits are the absolute value offset.
valueRefWord |= newAbsoluteOffset;
@@ -499,8 +610,6 @@ public class VectorMapJoinFastValueStore {
boolean isOldValueLast =
((oldValueRef & IsLastFlag.flagOnMask) != 0);
- // LOG.debug("VectorMapJoinFastValueStore addMore isOldValueLast " + isOldValueLast + " oldSmallValueLength " + oldSmallValueLength + " oldAbsoluteValueOffset " + oldAbsoluteValueOffset + " oldValueRef " + Long.toHexString(oldValueRef));
-
/*
* Write information about the old value (which becomes our next) at the beginning
* of our new value.
@@ -523,12 +632,6 @@ public class VectorMapJoinFastValueStore {
writeBuffers.writeVLong(relativeOffsetWord);
- // When the next value is small it was not recorded with the old (i.e. next) value and we
- // have to remember it.
- if (isOldValueLengthSmall) {
- writeBuffers.writeVInt(oldSmallValueLength);
- }
-
// Now, we have written all information about the next value, work on the *new* value.
long newValueRef = ((long) newCappedCount) << CappedCount.bitShift;
@@ -536,18 +639,28 @@ public class VectorMapJoinFastValueStore {
if (!isNewValueSmall) {
// Use magic value to indicating we are writing the big value length.
newValueRef |= ((long) SmallValueLength.allBitsOn << SmallValueLength.bitShift);
+ Preconditions.checkState(
+ (int) ((newValueRef & SmallValueLength.bitMask) >> SmallValueLength.bitShift) ==
+ SmallValueLength.allBitsOn);
writeBuffers.writeVInt(valueLength);
+
} else {
// Caller must remember small value length.
newValueRef |= ((long) valueLength) << SmallValueLength.bitShift;
}
+
+ // When the next value is small it was not recorded with the old (i.e. next) value and we
+ // have to remember it.
+ if (isOldValueLengthSmall) {
+
+ writeBuffers.writeVInt(oldSmallValueLength);
+ }
+
writeBuffers.write(valueBytes, valueStart, valueLength);
// The lower bits are the absolute value offset.
newValueRef |= newAbsoluteOffset;
- // LOG.debug("VectorMapJoinFastValueStore addMore valueLength " + valueLength + " newAbsoluteOffset " + newAbsoluteOffset + " newValueRef " + Long.toHexString(newValueRef));
-
return newValueRef;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/RandomRowObjectSource.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/RandomRowObjectSource.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/RandomRowObjectSource.java
deleted file mode 100644
index 2d4baa0..0000000
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/RandomRowObjectSource.java
+++ /dev/null
@@ -1,388 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.ql.exec.vector;
-
-import java.sql.Date;
-import java.sql.Timestamp;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Random;
-
-import junit.framework.TestCase;
-
-import org.apache.commons.lang.ArrayUtils;
-import org.apache.commons.lang.StringUtils;
-import org.apache.hadoop.hive.common.type.HiveChar;
-import org.apache.hadoop.hive.common.type.HiveDecimal;
-import org.apache.hadoop.hive.common.type.HiveIntervalDayTime;
-import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth;
-import org.apache.hadoop.hive.common.type.HiveVarchar;
-import org.apache.hadoop.hive.common.type.RandomTypeUtil;
-import org.apache.hadoop.hive.serde.serdeConstants;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
-import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBooleanObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableByteObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDateObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDoubleObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableFloatObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveCharObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveDecimalObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveIntervalDayTimeObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveIntervalYearMonthObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveVarcharObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableIntObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableLongObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableShortObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableTimestampObjectInspector;
-import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
-import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;
-import org.apache.hadoop.io.BooleanWritable;
-import org.apache.hive.common.util.DateUtils;
-
-/**
- * Generate object inspector and random row object[].
- */
-public class RandomRowObjectSource {
-
- private Random r;
-
- private int columnCount;
-
- private List<String> typeNames;
-
- private PrimitiveCategory[] primitiveCategories;
-
- private PrimitiveTypeInfo[] primitiveTypeInfos;
-
- private List<ObjectInspector> primitiveObjectInspectorList;
-
- private StructObjectInspector rowStructObjectInspector;
-
- public List<String> typeNames() {
- return typeNames;
- }
-
- public PrimitiveCategory[] primitiveCategories() {
- return primitiveCategories;
- }
-
- public PrimitiveTypeInfo[] primitiveTypeInfos() {
- return primitiveTypeInfos;
- }
-
- public StructObjectInspector rowStructObjectInspector() {
- return rowStructObjectInspector;
- }
-
- public void init(Random r) {
- this.r = r;
- chooseSchema();
- }
-
- private static String[] possibleHiveTypeNames = {
- "boolean",
- "tinyint",
- "smallint",
- "int",
- "bigint",
- "date",
- "float",
- "double",
- "string",
- "char",
- "varchar",
- "binary",
- "date",
- "timestamp",
- serdeConstants.INTERVAL_YEAR_MONTH_TYPE_NAME,
- serdeConstants.INTERVAL_DAY_TIME_TYPE_NAME,
- "decimal"
- };
-
- private void chooseSchema() {
- columnCount = 1 + r.nextInt(20);
- typeNames = new ArrayList<String>(columnCount);
- primitiveCategories = new PrimitiveCategory[columnCount];
- primitiveTypeInfos = new PrimitiveTypeInfo[columnCount];
- primitiveObjectInspectorList = new ArrayList<ObjectInspector>(columnCount);
- List<String> columnNames = new ArrayList<String>(columnCount);
- for (int c = 0; c < columnCount; c++) {
- columnNames.add(String.format("col%d", c));
- int typeNum = r.nextInt(possibleHiveTypeNames.length);
- String typeName = possibleHiveTypeNames[typeNum];
- if (typeName.equals("char")) {
- int maxLength = 1 + r.nextInt(100);
- typeName = String.format("char(%d)", maxLength);
- } else if (typeName.equals("varchar")) {
- int maxLength = 1 + r.nextInt(100);
- typeName = String.format("varchar(%d)", maxLength);
- } else if (typeName.equals("decimal")) {
- typeName = String.format("decimal(%d,%d)", HiveDecimal.SYSTEM_DEFAULT_PRECISION, HiveDecimal.SYSTEM_DEFAULT_SCALE);
- }
- PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString(typeName);
- primitiveTypeInfos[c] = primitiveTypeInfo;
- PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory();
- primitiveCategories[c] = primitiveCategory;
- primitiveObjectInspectorList.add(PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(primitiveTypeInfo));
- typeNames.add(typeName);
- }
- rowStructObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, primitiveObjectInspectorList);
- }
-
- public Object[][] randomRows(int n) {
- Object[][] result = new Object[n][];
- for (int i = 0; i < n; i++) {
- result[i] = randomRow();
- }
- return result;
- }
-
- public Object[] randomRow() {
- Object row[] = new Object[columnCount];
- for (int c = 0; c < columnCount; c++) {
- Object object = randomObject(c);
- if (object == null) {
- throw new Error("Unexpected null for column " + c);
- }
- row[c] = getWritableObject(c, object);
- if (row[c] == null) {
- throw new Error("Unexpected null for writable for column " + c);
- }
- }
- return row;
- }
-
- public Object getWritableObject(int column, Object object) {
- ObjectInspector objectInspector = primitiveObjectInspectorList.get(column);
- PrimitiveCategory primitiveCategory = primitiveCategories[column];
- PrimitiveTypeInfo primitiveTypeInfo = primitiveTypeInfos[column];
- switch (primitiveCategory) {
- case BOOLEAN:
- return ((WritableBooleanObjectInspector) objectInspector).create((boolean) object);
- case BYTE:
- return ((WritableByteObjectInspector) objectInspector).create((byte) object);
- case SHORT:
- return ((WritableShortObjectInspector) objectInspector).create((short) object);
- case INT:
- return ((WritableIntObjectInspector) objectInspector).create((int) object);
- case LONG:
- return ((WritableLongObjectInspector) objectInspector).create((long) object);
- case DATE:
- return ((WritableDateObjectInspector) objectInspector).create((Date) object);
- case FLOAT:
- return ((WritableFloatObjectInspector) objectInspector).create((float) object);
- case DOUBLE:
- return ((WritableDoubleObjectInspector) objectInspector).create((double) object);
- case STRING:
- return ((WritableStringObjectInspector) objectInspector).create((String) object);
- case CHAR:
- {
- WritableHiveCharObjectInspector writableCharObjectInspector =
- new WritableHiveCharObjectInspector( (CharTypeInfo) primitiveTypeInfo);
- return writableCharObjectInspector.create(new HiveChar(StringUtils.EMPTY, -1));
- }
- case VARCHAR:
- {
- WritableHiveVarcharObjectInspector writableVarcharObjectInspector =
- new WritableHiveVarcharObjectInspector( (VarcharTypeInfo) primitiveTypeInfo);
- return writableVarcharObjectInspector.create(new HiveVarchar(StringUtils.EMPTY, -1));
- }
- case BINARY:
- return PrimitiveObjectInspectorFactory.writableBinaryObjectInspector.create(ArrayUtils.EMPTY_BYTE_ARRAY);
- case TIMESTAMP:
- return ((WritableTimestampObjectInspector) objectInspector).create(new Timestamp(0));
- case INTERVAL_YEAR_MONTH:
- return ((WritableHiveIntervalYearMonthObjectInspector) objectInspector).create(new HiveIntervalYearMonth(0));
- case INTERVAL_DAY_TIME:
- return ((WritableHiveIntervalDayTimeObjectInspector) objectInspector).create(new HiveIntervalDayTime(0, 0));
- case DECIMAL:
- {
- WritableHiveDecimalObjectInspector writableDecimalObjectInspector =
- new WritableHiveDecimalObjectInspector((DecimalTypeInfo) primitiveTypeInfo);
- return writableDecimalObjectInspector.create(HiveDecimal.ZERO);
- }
- default:
- throw new Error("Unknown primitive category " + primitiveCategory);
- }
- }
-
- public Object randomObject(int column) {
- PrimitiveCategory primitiveCategory = primitiveCategories[column];
- PrimitiveTypeInfo primitiveTypeInfo = primitiveTypeInfos[column];
- switch (primitiveCategory) {
- case BOOLEAN:
- return Boolean.valueOf(r.nextInt(1) == 1);
- case BYTE:
- return Byte.valueOf((byte) r.nextInt());
- case SHORT:
- return Short.valueOf((short) r.nextInt());
- case INT:
- return Integer.valueOf(r.nextInt());
- case LONG:
- return Long.valueOf(r.nextLong());
- case DATE:
- return getRandDate(r);
- case FLOAT:
- return Float.valueOf(r.nextFloat() * 10 - 5);
- case DOUBLE:
- return Double.valueOf(r.nextDouble() * 10 - 5);
- case STRING:
- return getRandString(r);
- case CHAR:
- return getRandHiveChar(r, (CharTypeInfo) primitiveTypeInfo);
- case VARCHAR:
- return getRandHiveVarchar(r, (VarcharTypeInfo) primitiveTypeInfo);
- case BINARY:
- return getRandBinary(r, 1 + r.nextInt(100));
- case TIMESTAMP:
- return RandomTypeUtil.getRandTimestamp(r);
- case INTERVAL_YEAR_MONTH:
- return getRandIntervalYearMonth(r);
- case INTERVAL_DAY_TIME:
- return getRandIntervalDayTime(r);
- case DECIMAL:
- return getRandHiveDecimal(r, (DecimalTypeInfo) primitiveTypeInfo);
- default:
- throw new Error("Unknown primitive category " + primitiveCategory);
- }
- }
-
- public static String getRandString(Random r) {
- return getRandString(r, null, r.nextInt(10));
- }
-
- public static String getRandString(Random r, String characters, int length) {
- if (characters == null) {
- characters = "ABCDEFGHIJKLMabcdefghijklm";
- }
- StringBuilder sb = new StringBuilder();
- sb.append("");
- for (int i = 0; i < length; i++) {
- if (characters == null) {
- sb.append((char) (r.nextInt(128)));
- } else {
- sb.append(characters.charAt(r.nextInt(characters.length())));
- }
- }
- return sb.toString();
- }
-
- public static HiveChar getRandHiveChar(Random r, CharTypeInfo charTypeInfo) {
- int maxLength = 1 + r.nextInt(charTypeInfo.getLength());
- String randomString = getRandString(r, "abcdefghijklmnopqrstuvwxyz", 100);
- HiveChar hiveChar = new HiveChar(randomString, maxLength);
- return hiveChar;
- }
-
- public static HiveVarchar getRandHiveVarchar(Random r, VarcharTypeInfo varcharTypeInfo) {
- int maxLength = 1 + r.nextInt(varcharTypeInfo.getLength());
- String randomString = getRandString(r, "abcdefghijklmnopqrstuvwxyz", 100);
- HiveVarchar hiveVarchar = new HiveVarchar(randomString, maxLength);
- return hiveVarchar;
- }
-
- public static byte[] getRandBinary(Random r, int len){
- byte[] bytes = new byte[len];
- for (int j = 0; j < len; j++){
- bytes[j] = Byte.valueOf((byte) r.nextInt());
- }
- return bytes;
- }
-
- private static final String DECIMAL_CHARS = "0123456789";
-
- public static HiveDecimal getRandHiveDecimal(Random r, DecimalTypeInfo decimalTypeInfo) {
- while (true) {
- StringBuilder sb = new StringBuilder();
- int precision = 1 + r.nextInt(18);
- int scale = 0 + r.nextInt(precision + 1);
-
- int integerDigits = precision - scale;
-
- if (r.nextBoolean()) {
- sb.append("-");
- }
-
- if (integerDigits == 0) {
- sb.append("0");
- } else {
- sb.append(getRandString(r, DECIMAL_CHARS, integerDigits));
- }
- if (scale != 0) {
- sb.append(".");
- sb.append(getRandString(r, DECIMAL_CHARS, scale));
- }
-
- HiveDecimal bd = HiveDecimal.create(sb.toString());
- if (bd.scale() > bd.precision()) {
- // Sometimes weird decimals are produced?
- continue;
- }
-
- return bd;
- }
- }
-
- public static Date getRandDate(Random r) {
- String dateStr = String.format("%d-%02d-%02d",
- Integer.valueOf(1800 + r.nextInt(500)), // year
- Integer.valueOf(1 + r.nextInt(12)), // month
- Integer.valueOf(1 + r.nextInt(28))); // day
- Date dateVal = Date.valueOf(dateStr);
- return dateVal;
- }
-
- public static HiveIntervalYearMonth getRandIntervalYearMonth(Random r) {
- String yearMonthSignStr = r.nextInt(2) == 0 ? "" : "-";
- String intervalYearMonthStr = String.format("%s%d-%d",
- yearMonthSignStr,
- Integer.valueOf(1800 + r.nextInt(500)), // year
- Integer.valueOf(0 + r.nextInt(12))); // month
- HiveIntervalYearMonth intervalYearMonthVal = HiveIntervalYearMonth.valueOf(intervalYearMonthStr);
- TestCase.assertTrue(intervalYearMonthVal != null);
- return intervalYearMonthVal;
- }
-
- public static HiveIntervalDayTime getRandIntervalDayTime(Random r) {
- String optionalNanos = "";
- if (r.nextInt(2) == 1) {
- optionalNanos = String.format(".%09d",
- Integer.valueOf(0 + r.nextInt(DateUtils.NANOS_PER_SEC)));
- }
- String yearMonthSignStr = r.nextInt(2) == 0 ? "" : "-";
- String dayTimeStr = String.format("%s%d %02d:%02d:%02d%s",
- yearMonthSignStr,
- Integer.valueOf(1 + r.nextInt(28)), // day
- Integer.valueOf(0 + r.nextInt(24)), // hour
- Integer.valueOf(0 + r.nextInt(60)), // minute
- Integer.valueOf(0 + r.nextInt(60)), // second
- optionalNanos);
- HiveIntervalDayTime intervalDayTimeVal = HiveIntervalDayTime.valueOf(dayTimeStr);
- TestCase.assertTrue(intervalDayTimeVal != null);
- return intervalDayTimeVal;
- }
-}
http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorRowObject.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorRowObject.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorRowObject.java
index 959a2af..c55d951 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorRowObject.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorRowObject.java
@@ -19,13 +19,10 @@
package org.apache.hadoop.hive.ql.exec.vector;
import java.util.Arrays;
-import java.util.HashMap;
-import java.util.Map;
import java.util.Random;
import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.fast.RandomRowObjectSource;
import junit.framework.TestCase;
@@ -50,7 +47,7 @@ public class TestVectorRowObject extends TestCase {
}
}
- void testVectorRowObject(int caseNum, Random r) throws HiveException {
+ void testVectorRowObject(int caseNum, boolean sort, Random r) throws HiveException {
String[] emptyScratchTypeNames = new String[0];
@@ -74,6 +71,9 @@ public class TestVectorRowObject extends TestCase {
vectorExtractRow.init(source.typeNames());
Object[][] randomRows = source.randomRows(100000);
+ if (sort) {
+ source.sort(randomRows);
+ }
int firstRandomRowIndex = 0;
for (int i = 0; i < randomRows.length; i++) {
Object[] row = randomRows[i];
@@ -93,14 +93,22 @@ public class TestVectorRowObject extends TestCase {
public void testVectorRowObject() throws Throwable {
- try {
- Random r = new Random(5678);
- for (int c = 0; c < 10; c++) {
- testVectorRowObject(c, r);
+ try {
+ Random r = new Random(5678);
+
+ int caseNum = 0;
+ for (int i = 0; i < 10; i++) {
+ testVectorRowObject(caseNum, false, r);
+ caseNum++;
+ }
+
+ // Try one sorted.
+ testVectorRowObject(caseNum, true, r);
+ caseNum++;
+
+ } catch (Throwable e) {
+ e.printStackTrace();
+ throw e;
}
- } catch (Throwable e) {
- e.printStackTrace();
- throw e;
- }
}
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSerDeRow.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSerDeRow.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSerDeRow.java
index e37d2bf..da69ee3 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSerDeRow.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSerDeRow.java
@@ -22,8 +22,6 @@ import java.io.IOException;
import java.sql.Date;
import java.sql.Timestamp;
import java.util.Arrays;
-import java.util.HashMap;
-import java.util.Map;
import java.util.Properties;
import java.util.Random;
@@ -50,6 +48,7 @@ import org.apache.hadoop.hive.serde2.ByteStream.Output;
import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead;
import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite;
import org.apache.hadoop.hive.serde2.fast.DeserializeRead;
+import org.apache.hadoop.hive.serde2.fast.RandomRowObjectSource;
import org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters;
import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe;
import org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleDeserializeRead;
@@ -62,7 +61,6 @@ import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;
import org.apache.hadoop.hive.serde2.fast.SerializeWrite;
import org.apache.hadoop.io.BooleanWritable;
@@ -86,7 +84,7 @@ public class TestVectorSerDeRow extends TestCase {
LAZY_SIMPLE
}
- void deserializeAndVerify(Output output, DeserializeRead deserializeRead,
+ void deserializeAndVerify(Output output, DeserializeRead deserializeRead,
RandomRowObjectSource source, Object[] expectedRow)
throws HiveException, IOException {
deserializeRead.set(output.getData(), 0, output.getLength());
@@ -523,7 +521,7 @@ public class TestVectorSerDeRow extends TestCase {
// Set the configuration parameters
tbl.setProperty(serdeConstants.SERIALIZATION_FORMAT, "9");
-
+
tbl.setProperty("columns", fieldNames);
tbl.setProperty("columns.types", fieldTypes);
http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CheckFastHashTable.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CheckFastHashTable.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CheckFastHashTable.java
new file mode 100644
index 0000000..3a23584
--- /dev/null
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CheckFastHashTable.java
@@ -0,0 +1,721 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Random;
+import java.util.TreeMap;
+
+import junit.framework.TestCase;
+
+import org.apache.hadoop.hive.ql.exec.JoinUtil;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr;
+import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult;
+import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMultiSetResult;
+import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashSetResult;
+import org.apache.hadoop.hive.serde2.WriteBuffers;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.WritableComparator;
+
+import com.google.common.base.Preconditions;
+
+import static org.junit.Assert.*;
+
+public class CheckFastHashTable {
+
+ public static boolean findMatch(byte[] valueBytes, List<byte[]> actualValues, int actualCount, boolean[] taken) {
+ for (int i = 0; i < actualCount; i++) {
+ if (!taken[i]) {
+ byte[] actualBytes = actualValues.get(i);
+ if (StringExpr.compare(valueBytes, 0, valueBytes.length, actualBytes, 0, actualBytes.length) == 0) {
+ taken[i] = true;
+ return true;
+ }
+ }
+ }
+ return false;
+ }
+
+ public static void verifyHashMapValues(VectorMapJoinHashMapResult hashMapResult,
+ List<byte[]> values) {
+
+ int valueCount = values.size();
+
+ WriteBuffers.ByteSegmentRef ref = hashMapResult.first();
+
+ // Read through all values.
+ List<byte[]> actualValues = new ArrayList<byte[]>();
+ while (true) {
+ byte[] bytes = ref.getBytes();
+ int offset = (int) ref.getOffset();
+ int length = ref.getLength();
+
+ if (length == 0) {
+ actualValues.add(new byte[0]);
+ } else {
+ actualValues.add(Arrays.copyOfRange(bytes, offset, offset + length));
+ }
+ ref = hashMapResult.next();
+ if (ref == null) {
+ break;
+ }
+ }
+
+ int actualCount = actualValues.size();
+
+ if (valueCount != actualCount) {
+ TestCase.fail("values.size() " + valueCount + " does not match actualCount " + actualCount);
+ }
+
+ boolean[] taken = new boolean[actualCount];
+
+ for (int i = 0; i < actualCount; i++) {
+ byte[] valueBytes = values.get(i);
+
+ if (!findMatch(valueBytes, actualValues, actualCount, taken)) {
+ List<Integer> availableLengths = new ArrayList<Integer>();
+ for (int a = 0; a < actualCount; a++) {
+ if (!taken[a]) {
+ availableLengths.add(actualValues.get(a).length);
+ }
+ }
+ TestCase.fail("No match for actual value (valueBytes length " + valueBytes.length +
+ ", availableLengths " + availableLengths.toString() + " of " + actualCount + " total)");
+ }
+ }
+ }
+
+ /*
+ * Element for Key: Long x Hash Table: HashMap
+ */
+ public static class FastLongHashMapElement {
+ private long key;
+ private List<byte[]> values;
+
+ public FastLongHashMapElement(long key, byte[] firstValue) {
+ this.key = key;
+ values = new ArrayList<byte[]>();
+ values.add(firstValue);
+ }
+
+ public long getKey() {
+ return key;
+ }
+
+ public int getValueCount() {
+ return values.size();
+ }
+
+ public List<byte[]> getValues() {
+ return values;
+ }
+
+ public void addValue(byte[] value) {
+ values.add(value);
+ }
+ }
+
+ /*
+ * Verify table for Key: Long x Hash Table: HashMap
+ */
+ public static class VerifyFastLongHashMap {
+
+ private int count;
+
+ private FastLongHashMapElement[] array;
+
+ private HashMap<Long, Integer> keyValueMap;
+
+ public VerifyFastLongHashMap() {
+ count = 0;
+ array = new FastLongHashMapElement[50];
+ keyValueMap = new HashMap<Long, Integer>();
+ }
+
+ public int getCount() {
+ return count;
+ }
+
+ public boolean contains(long key) {
+ return keyValueMap.containsKey(key);
+ }
+
+ public void add(long key, byte[] value) {
+ if (keyValueMap.containsKey(key)) {
+ int index = keyValueMap.get(key);
+ array[index].addValue(value);
+ } else {
+ if (count >= array.length) {
+ // Grow.
+ FastLongHashMapElement[] newArray = new FastLongHashMapElement[array.length * 2];
+ System.arraycopy(array, 0, newArray, 0, count);
+ array = newArray;
+ }
+ array[count] = new FastLongHashMapElement(key, value);
+ keyValueMap.put(key, count);
+ count++;
+ }
+ }
+
+ public long addRandomExisting(byte[] value, Random r) {
+ Preconditions.checkState(count > 0);
+ int index = r.nextInt(count);
+ array[index].addValue(value);
+ return array[index].getKey();
+ }
+
+ public long getKey(int index) {
+ return array[index].getKey();
+ }
+
+ public List<byte[]> getValues(int index) {
+ return array[index].getValues();
+ }
+
+ public void verify(VectorMapJoinFastLongHashMap map) {
+ int mapSize = map.size();
+ if (mapSize != count) {
+ TestCase.fail("map.size() does not match expected count");
+ }
+
+ for (int index = 0; index < count; index++) {
+ FastLongHashMapElement element = array[index];
+ long key = element.getKey();
+ List<byte[]> values = element.getValues();
+
+ VectorMapJoinHashMapResult hashMapResult = map.createHashMapResult();
+ JoinUtil.JoinResult joinResult = map.lookup(key, hashMapResult);
+ if (joinResult != JoinUtil.JoinResult.MATCH) {
+ assertTrue(false);
+ }
+
+ verifyHashMapValues(hashMapResult, values);
+ }
+ }
+ }
+
+ /*
+ * Element for Key: byte[] x Hash Table: HashMap
+ */
+ public static class FastBytesHashMapElement {
+ private byte[] key;
+ private List<byte[]> values;
+
+ public FastBytesHashMapElement(byte[] key, byte[] firstValue) {
+ this.key = key;
+ values = new ArrayList<byte[]>();
+ values.add(firstValue);
+ }
+
+ public byte[] getKey() {
+ return key;
+ }
+
+ public int getValueCount() {
+ return values.size();
+ }
+
+ public List<byte[]> getValues() {
+ return values;
+ }
+
+ public void addValue(byte[] value) {
+ values.add(value);
+ }
+ }
+
+ /*
+ * Verify table for Key: byte[] x Hash Table: HashMap
+ */
+ public static class VerifyFastBytesHashMap {
+
+ private int count;
+
+ private FastBytesHashMapElement[] array;
+
+ private TreeMap<BytesWritable, Integer> keyValueMap;
+
+ public VerifyFastBytesHashMap() {
+ count = 0;
+ array = new FastBytesHashMapElement[50];
+
+ // We use BytesWritable because it supports Comparable for our TreeMap.
+ keyValueMap = new TreeMap<BytesWritable, Integer>();
+ }
+
+ public int getCount() {
+ return count;
+ }
+
+ public boolean contains(byte[] key) {
+ BytesWritable keyBytesWritable = new BytesWritable(key, key.length);
+ return keyValueMap.containsKey(keyBytesWritable);
+ }
+
+ public void add(byte[] key, byte[] value) {
+ BytesWritable keyBytesWritable = new BytesWritable(key, key.length);
+ if (keyValueMap.containsKey(keyBytesWritable)) {
+ int index = keyValueMap.get(keyBytesWritable);
+ array[index].addValue(value);
+ } else {
+ if (count >= array.length) {
+ // Grow.
+ FastBytesHashMapElement[] newArray = new FastBytesHashMapElement[array.length * 2];
+ System.arraycopy(array, 0, newArray, 0, count);
+ array = newArray;
+ }
+ array[count] = new FastBytesHashMapElement(key, value);
+ keyValueMap.put(keyBytesWritable, count);
+ count++;
+ }
+ }
+
+ public byte[] addRandomExisting(byte[] value, Random r) {
+ Preconditions.checkState(count > 0);
+ int index = r.nextInt(count);
+ array[index].addValue(value);
+ return array[index].getKey();
+ }
+
+ public byte[] getKey(int index) {
+ return array[index].getKey();
+ }
+
+ public List<byte[]> getValues(int index) {
+ return array[index].getValues();
+ }
+
+ public void verify(VectorMapJoinFastBytesHashMap map) {
+ int mapSize = map.size();
+ if (mapSize != count) {
+ TestCase.fail("map.size() does not match expected count");
+ }
+
+ for (int index = 0; index < count; index++) {
+ FastBytesHashMapElement element = array[index];
+ byte[] key = element.getKey();
+ List<byte[]> values = element.getValues();
+
+ VectorMapJoinHashMapResult hashMapResult = map.createHashMapResult();
+ JoinUtil.JoinResult joinResult = map.lookup(key, 0, key.length, hashMapResult);
+ if (joinResult != JoinUtil.JoinResult.MATCH) {
+ assertTrue(false);
+ }
+
+ verifyHashMapValues(hashMapResult, values);
+ }
+ }
+ }
+
+ /*
+ * Element for Key: Long x Hash Table: HashMultiSet
+ */
+ public static class FastLongHashMultiSetElement {
+ private long key;
+ private int multiSetCount;
+
+ public FastLongHashMultiSetElement(long key) {
+ this.key = key;
+ multiSetCount = 1;
+ }
+
+ public long getKey() {
+ return key;
+ }
+
+ public int getMultiSetCount() {
+ return multiSetCount;
+ }
+
+ public void incrementMultiSetCount() {
+ multiSetCount++;
+ }
+ }
+
+ /*
+ * Verify table for Key: Long x Hash Table: HashMultiSet
+ */
+ public static class VerifyFastLongHashMultiSet {
+
+ private int count;
+
+ private FastLongHashMultiSetElement[] array;
+
+ private HashMap<Long, Integer> keyValueMap;
+
+ public VerifyFastLongHashMultiSet() {
+ count = 0;
+ array = new FastLongHashMultiSetElement[50];
+ keyValueMap = new HashMap<Long, Integer>();
+ }
+
+ public int getCount() {
+ return count;
+ }
+
+ public boolean contains(long key) {
+ return keyValueMap.containsKey(key);
+ }
+
+ public void add(long key) {
+ if (keyValueMap.containsKey(key)) {
+ int index = keyValueMap.get(key);
+ array[index].incrementMultiSetCount();
+ } else {
+ if (count >= array.length) {
+ // Grow.
+ FastLongHashMultiSetElement[] newArray = new FastLongHashMultiSetElement[array.length * 2];
+ System.arraycopy(array, 0, newArray, 0, count);
+ array = newArray;
+ }
+ array[count] = new FastLongHashMultiSetElement(key);
+ keyValueMap.put(key, count);
+ count++;
+ }
+ }
+
+ public long addRandomExisting(byte[] value, Random r) {
+ Preconditions.checkState(count > 0);
+ int index = r.nextInt(count);
+ array[index].incrementMultiSetCount();
+ return array[index].getKey();
+ }
+
+ public long getKey(int index) {
+ return array[index].getKey();
+ }
+
+ public int getMultiSetCount(int index) {
+ return array[index].getMultiSetCount();
+ }
+
+ public void verify(VectorMapJoinFastLongHashMultiSet map) {
+ int mapSize = map.size();
+ if (mapSize != count) {
+ TestCase.fail("map.size() does not match expected count");
+ }
+
+ for (int index = 0; index < count; index++) {
+ FastLongHashMultiSetElement element = array[index];
+ long key = element.getKey();
+ int multiSetCount = element.getMultiSetCount();
+
+ VectorMapJoinHashMultiSetResult hashMultiSetResult = map.createHashMultiSetResult();
+ JoinUtil.JoinResult joinResult = map.contains(key, hashMultiSetResult);
+ if (joinResult != JoinUtil.JoinResult.MATCH) {
+ assertTrue(false);
+ }
+
+ assertEquals(hashMultiSetResult.count(), multiSetCount);
+ }
+ }
+ }
+
+ /*
+ * Element for Key: byte[] x Hash Table: HashMultiSet
+ */
+ public static class FastBytesHashMultiSetElement {
+ private byte[] key;
+ private int multiSetCount;
+
+ public FastBytesHashMultiSetElement(byte[] key) {
+ this.key = key;
+ multiSetCount = 1;
+ }
+
+ public byte[] getKey() {
+ return key;
+ }
+
+ public int getMultiSetCount() {
+ return multiSetCount;
+ }
+
+ public void incrementMultiSetCount() {
+ multiSetCount++;
+ }
+ }
+
+ /*
+ * Verify table for Key: byte[] x Hash Table: HashMultiSet
+ */
+ public static class VerifyFastBytesHashMultiSet {
+
+ private int count;
+
+ private FastBytesHashMultiSetElement[] array;
+
+ private TreeMap<BytesWritable, Integer> keyValueMap;
+
+ public VerifyFastBytesHashMultiSet() {
+ count = 0;
+ array = new FastBytesHashMultiSetElement[50];
+
+ // We use BytesWritable because it supports Comparable for our TreeMap.
+ keyValueMap = new TreeMap<BytesWritable, Integer>();
+ }
+
+ public int getCount() {
+ return count;
+ }
+
+ public boolean contains(byte[] key) {
+ BytesWritable keyBytesWritable = new BytesWritable(key, key.length);
+ return keyValueMap.containsKey(keyBytesWritable);
+ }
+
+ public void add(byte[] key) {
+ BytesWritable keyBytesWritable = new BytesWritable(key, key.length);
+ if (keyValueMap.containsKey(keyBytesWritable)) {
+ int index = keyValueMap.get(keyBytesWritable);
+ array[index].incrementMultiSetCount();
+ } else {
+ if (count >= array.length) {
+ // Grow.
+ FastBytesHashMultiSetElement[] newArray = new FastBytesHashMultiSetElement[array.length * 2];
+ System.arraycopy(array, 0, newArray, 0, count);
+ array = newArray;
+ }
+ array[count] = new FastBytesHashMultiSetElement(key);
+ keyValueMap.put(keyBytesWritable, count);
+ count++;
+ }
+ }
+
+ public byte[] addRandomExisting(byte[] value, Random r) {
+ Preconditions.checkState(count > 0);
+ int index = r.nextInt(count);
+ array[index].incrementMultiSetCount();
+ return array[index].getKey();
+ }
+
+ public byte[] getKey(int index) {
+ return array[index].getKey();
+ }
+
+ public int getMultiSetCount(int index) {
+ return array[index].getMultiSetCount();
+ }
+
+ public void verify(VectorMapJoinFastBytesHashMultiSet map) {
+ int mapSize = map.size();
+ if (mapSize != count) {
+ TestCase.fail("map.size() does not match expected count");
+ }
+
+ for (int index = 0; index < count; index++) {
+ FastBytesHashMultiSetElement element = array[index];
+ byte[] key = element.getKey();
+ int multiSetCount = element.getMultiSetCount();
+
+ VectorMapJoinHashMultiSetResult hashMultiSetResult = map.createHashMultiSetResult();
+ JoinUtil.JoinResult joinResult = map.contains(key, 0, key.length, hashMultiSetResult);
+ if (joinResult != JoinUtil.JoinResult.MATCH) {
+ assertTrue(false);
+ }
+
+ assertEquals(hashMultiSetResult.count(), multiSetCount);
+ }
+ }
+ }
+
+ /*
+ * Element for Key: Long x Hash Table: HashSet
+ */
+ public static class FastLongHashSetElement {
+ private long key;
+
+ public FastLongHashSetElement(long key) {
+ this.key = key;
+ }
+
+ public long getKey() {
+ return key;
+ }
+ }
+
+ /*
+ * Verify table for Key: Long x Hash Table: HashSet
+ */
+ public static class VerifyFastLongHashSet {
+
+ private int count;
+
+ private FastLongHashSetElement[] array;
+
+ private HashMap<Long, Integer> keyValueMap;
+
+ public VerifyFastLongHashSet() {
+ count = 0;
+ array = new FastLongHashSetElement[50];
+ keyValueMap = new HashMap<Long, Integer>();
+ }
+
+ public int getCount() {
+ return count;
+ }
+
+ public boolean contains(long key) {
+ return keyValueMap.containsKey(key);
+ }
+
+ public void add(long key) {
+ if (keyValueMap.containsKey(key)) {
+ // Already exists.
+ } else {
+ if (count >= array.length) {
+ // Grow.
+ FastLongHashSetElement[] newArray = new FastLongHashSetElement[array.length * 2];
+ System.arraycopy(array, 0, newArray, 0, count);
+ array = newArray;
+ }
+ array[count] = new FastLongHashSetElement(key);
+ keyValueMap.put(key, count);
+ count++;
+ }
+ }
+
+ public long addRandomExisting(byte[] value, Random r) {
+ Preconditions.checkState(count > 0);
+ int index = r.nextInt(count);
+
+ // Exists aleady.
+
+ return array[index].getKey();
+ }
+
+ public long getKey(int index) {
+ return array[index].getKey();
+ }
+
+ public void verify(VectorMapJoinFastLongHashSet map) {
+ int mapSize = map.size();
+ if (mapSize != count) {
+ TestCase.fail("map.size() does not match expected count");
+ }
+
+ for (int index = 0; index < count; index++) {
+ FastLongHashSetElement element = array[index];
+ long key = element.getKey();
+
+ VectorMapJoinHashSetResult hashSetResult = map.createHashSetResult();
+ JoinUtil.JoinResult joinResult = map.contains(key, hashSetResult);
+ if (joinResult != JoinUtil.JoinResult.MATCH) {
+ assertTrue(false);
+ }
+ }
+ }
+ }
+
+ /*
+ * Element for Key: byte[] x Hash Table: HashSet
+ */
+ public static class FastBytesHashSetElement {
+ private byte[] key;
+
+ public FastBytesHashSetElement(byte[] key) {
+ this.key = key;
+ }
+
+ public byte[] getKey() {
+ return key;
+ }
+ }
+
+ /*
+ * Verify table for Key: byte[] x Hash Table: HashSet
+ */
+ public static class VerifyFastBytesHashSet {
+
+ private int count;
+
+ private FastBytesHashSetElement[] array;
+
+ private TreeMap<BytesWritable, Integer> keyValueMap;
+
+ public VerifyFastBytesHashSet() {
+ count = 0;
+ array = new FastBytesHashSetElement[50];
+
+ // We use BytesWritable because it supports Comparable for our TreeMap.
+ keyValueMap = new TreeMap<BytesWritable, Integer>();
+ }
+
+ public int getCount() {
+ return count;
+ }
+
+ public boolean contains(byte[] key) {
+ BytesWritable keyBytesWritable = new BytesWritable(key, key.length);
+ return keyValueMap.containsKey(keyBytesWritable);
+ }
+
+ public void add(byte[] key) {
+ BytesWritable keyBytesWritable = new BytesWritable(key, key.length);
+ if (keyValueMap.containsKey(keyBytesWritable)) {
+ // Already exists.
+ } else {
+ if (count >= array.length) {
+ // Grow.
+ FastBytesHashSetElement[] newArray = new FastBytesHashSetElement[array.length * 2];
+ System.arraycopy(array, 0, newArray, 0, count);
+ array = newArray;
+ }
+ array[count] = new FastBytesHashSetElement(key);
+ keyValueMap.put(keyBytesWritable, count);
+ count++;
+ }
+ }
+
+ public byte[] addRandomExisting(byte[] value, Random r) {
+ Preconditions.checkState(count > 0);
+ int index = r.nextInt(count);
+
+ // Already exists.
+
+ return array[index].getKey();
+ }
+
+ public byte[] getKey(int index) {
+ return array[index].getKey();
+ }
+
+ public void verify(VectorMapJoinFastBytesHashSet map) {
+ int mapSize = map.size();
+ if (mapSize != count) {
+ TestCase.fail("map.size() does not match expected count");
+ }
+
+ for (int index = 0; index < count; index++) {
+ FastBytesHashSetElement element = array[index];
+ byte[] key = element.getKey();
+
+ VectorMapJoinHashSetResult hashSetResult = map.createHashSetResult();
+ JoinUtil.JoinResult joinResult = map.contains(key, 0, key.length, hashSetResult);
+ if (joinResult != JoinUtil.JoinResult.MATCH) {
+ assertTrue(false);
+ }
+ }
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CommonFastHashTable.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CommonFastHashTable.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CommonFastHashTable.java
index c2375e0..90e8f33 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CommonFastHashTable.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CommonFastHashTable.java
@@ -18,16 +18,8 @@
package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
import java.util.Random;
-import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult;
-import org.apache.hadoop.hive.serde2.WriteBuffers;
-
-import static org.junit.Assert.*;
-
public class CommonFastHashTable {
protected static final float LOAD_FACTOR = 0.75f;
@@ -39,6 +31,10 @@ public class CommonFastHashTable {
protected static final int LARGE_CAPACITY = 8388608;
protected static Random random;
+ protected static int MAX_KEY_LENGTH = 100;
+
+ protected static int MAX_VALUE_LENGTH = 1000;
+
public static int generateLargeCount() {
int count = 0;
if (random.nextInt(100) != 0) {
@@ -75,54 +71,4 @@ public class CommonFastHashTable {
}
return count;
}
- public static void verifyHashMapResult(VectorMapJoinHashMapResult hashMapResult,
- RandomByteArrayStream randomByteArrayStream ) {
-
- List<byte[]> resultBytes = new ArrayList<byte[]>();
- int count = 0;
- if (hashMapResult.hasRows()) {
- WriteBuffers.ByteSegmentRef ref = hashMapResult.first();
- while (ref != null) {
- count++;
- byte[] bytes = ref.getBytes();
- int offset = (int) ref.getOffset();
- int length = ref.getLength();
- resultBytes.add(Arrays.copyOfRange(bytes, offset, offset + length));
- ref = hashMapResult.next();
- }
- } else {
- assertTrue(hashMapResult.isEof());
- }
- if (randomByteArrayStream.size() != count) {
- assertTrue(false);
- }
-
- for (int i = 0; i < count; ++i) {
- byte[] bytes = resultBytes.get(i);
- if (!randomByteArrayStream.contains(bytes)) {
- assertTrue(false);
- }
- }
- }
-
- public static void verifyHashMapResult(VectorMapJoinHashMapResult hashMapResult,
- byte[] valueBytes ) {
-
- assertTrue(hashMapResult.hasRows());
- WriteBuffers.ByteSegmentRef ref = hashMapResult.first();
- byte[] bytes = ref.getBytes();
- int offset = (int) ref.getOffset();
- int length = ref.getLength();
- assertTrue(valueBytes.length == length);
- boolean match = true; // Assume
- for (int j = 0; j < length; j++) {
- if (valueBytes[j] != bytes[offset + j]) {
- match = false;
- break;
- }
- }
- if (!match) {
- assertTrue(false);
- }
- }
}
\ No newline at end of file