You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by go...@apache.org on 2015/08/10 22:56:22 UTC

hive git commit: HIVE-11457: Vectorization: Improve GenVectorCode string equals intrinsic (Gopal V, reviewed by Matt McCline)

Repository: hive
Updated Branches:
  refs/heads/master cfda5700a -> bddbd1da0


HIVE-11457: Vectorization: Improve GenVectorCode string equals intrinsic (Gopal V, reviewed by Matt McCline)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/bddbd1da
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/bddbd1da
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/bddbd1da

Branch: refs/heads/master
Commit: bddbd1da0c570a4c03f80b695b940a181787c5ca
Parents: cfda570
Author: Gopal V <go...@apache.org>
Authored: Mon Aug 10 13:55:29 2015 -0700
Committer: Gopal V <go...@apache.org>
Committed: Mon Aug 10 13:56:00 2015 -0700

----------------------------------------------------------------------
 .../ql/exec/vector/expressions/StringExpr.java  | 49 +++++++++++++++-----
 1 file changed, 38 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/bddbd1da/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringExpr.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringExpr.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringExpr.java
index ebeb642..90817a5 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringExpr.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringExpr.java
@@ -51,22 +51,49 @@ public class StringExpr {
    * Use lexicographic unsigned byte value order.
    * This is what's used for UTF-8 sort order.
    */
-  public static boolean equal(byte[] arg1, int start1, int len1, byte[] arg2, int start2, int len2) {
+  public static boolean equal(byte[] arg1, final int start1, final int len1,
+      byte[] arg2, final int start2, final int len2) {
     if (len1 != len2) {
       return false;
     }
-    for (int index1 = start1,
-             index2 = start2;
-         len1 > 0;
-         len1--,
-         index1++,
-         index2++) {
-      // Note the "& 0xff" is just a way to convert unsigned bytes to signed integer.
-      if ((arg1[index1] & 0xff) != (arg2[index2] & 0xff)) {
-        return false;
-      }
+    if (len1 == 0) {
+      return true;
+    }
+
+    // do bounds check for OOB exception
+    if (arg1[start1] != arg2[start2]
+        || arg1[start1 + len1 - 1] != arg2[start2 + len2 - 1]) {
+      return false;
+    }
 
+    if (len1 == len2) {
+      // prove invariant to the compiler: len1 = len2
+      // all array access between (start1, start1+len1) 
+      // and (start2, start2+len2) are valid
+      // no more OOB exceptions are possible
+      final int step = 8;
+      final int remainder = len1 % step;
+      final int wlen = len1 - remainder;
+      // suffix first
+      for (int i = wlen; i < len1; i++) {
+        if (arg1[start1 + i] != arg2[start2 + i]) {
+          return false;
+        }
+      }
+      // SIMD loop
+      for (int i = 0; i < wlen; i += step) {
+        final int s1 = start1 + i;
+        final int s2 = start2 + i;
+        boolean neq = false;
+        for (int j = 0; j < step; j++) {
+          neq = (arg1[s1 + j] != arg2[s2 + j]) || neq;
+        }
+        if (neq) {
+          return false;
+        }
+      }
     }
+
     return true;
   }