You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2020/10/06 04:09:08 UTC

[hive] branch master updated: HIVE-24205: Optimise CuckooSetBytes (Mustafa Iman via Rajesh Balamohan)

This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 56646de  HIVE-24205: Optimise CuckooSetBytes (Mustafa Iman via Rajesh Balamohan)
56646de is described below

commit 56646de06047320fa95387318be34ee80a123788
Author: Mustafa Iman <mu...@gmail.com>
AuthorDate: Fri Oct 2 16:35:44 2020 -0700

    HIVE-24205: Optimise CuckooSetBytes (Mustafa Iman via Rajesh Balamohan)
    
    Signed-off-by: Ashutosh Chauhan <ha...@apache.org>
---
 .../hadoop/hive/ql/exec/vector/expressions/CuckooSetBytes.java     | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CuckooSetBytes.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CuckooSetBytes.java
index f9a86ae..10e8332 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CuckooSetBytes.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CuckooSetBytes.java
@@ -41,6 +41,8 @@ public class CuckooSetBytes {
   private int rehashCount = 0;
   private static final long INT_MASK  = 0x00000000ffffffffL;
   private static final long BYTE_MASK = 0x00000000000000ffL;
+  private int maxLen;
+  private int minLen = Integer.MAX_VALUE;
 
   /**
    * Allocate a new set to hold expectedSize values. Re-allocation to expand
@@ -76,6 +78,9 @@ public class CuckooSetBytes {
    * and ending at start+len is present in the set.
    */
   public boolean lookup(byte[] b, int start, int len) {
+    if (len < minLen || len > maxLen) {
+      return false;
+    }
 
     return entryEqual(t1, h1(b, start, len), b, start, len)
         || entryEqual(t2, h2(b, start, len), b, start, len);
@@ -90,6 +95,8 @@ public class CuckooSetBytes {
     if (lookup(x, 0, x.length)) {
       return;
     }
+    minLen = Math.min(minLen, x.length);
+    maxLen = Math.max(maxLen, x.length);
 
     // Try to insert up to n times. Rehash if that fails.
     for(int i = 0; i != n; i++) {