You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by xu...@apache.org on 2015/11/12 04:59:48 UTC

[24/55] [abbrv] hive git commit: HIVE-12288: Bloom-1 filters for Vectorized map-joins (Gopal V, reviewed by Matt McCline)

HIVE-12288: Bloom-1 filters for Vectorized map-joins (Gopal V, reviewed by Matt McCline)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/b29705ed
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/b29705ed
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/b29705ed

Branch: refs/heads/spark
Commit: b29705ed73a936566a49360a5940c8b933f5e3db
Parents: b5654cc
Author: Gopal V <go...@apache.org>
Authored: Thu Nov 5 22:18:11 2015 -0800
Committer: Gopal V <go...@apache.org>
Committed: Thu Nov 5 22:18:11 2015 -0800

----------------------------------------------------------------------
 .../ql/exec/persistence/HybridHashTableContainer.java    | 11 +++++++++++
 1 file changed, 11 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/b29705ed/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java
index 632ba4f..a0c9b98 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java
@@ -985,6 +985,17 @@ public class HybridHashTableContainer
       int keyHash = HashCodeUtil.murmurHash(bytes, offset, length);
       partitionId = keyHash & (hashPartitions.length - 1);
 
+      if (!bloom1.testLong(keyHash)) {
+        /*
+         * if the keyHash is missing in the bloom filter, then the value cannot exist in any of the
+         * spilled partition - return NOMATCH
+         */
+        dummyRow = null;
+        aliasFilter = (byte) 0xff;
+        hashMapResult.forget();
+        return JoinResult.NOMATCH;
+      }
+
       // If the target hash table is on disk, spill this row to disk as well to be processed later
       if (isOnDisk(partitionId)) {
         return JoinUtil.JoinResult.SPILL;