You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by mo...@apache.org on 2022/03/01 03:25:25 UTC

[incubator-doris] branch master updated: [improvement][vec] better memequal impl to speed up string compare (#8229)

This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git


The following commit(s) were added to refs/heads/master by this push:
     new ada39dd  [improvement][vec] better memequal impl to speed up string compare (#8229)
ada39dd is described below

commit ada39dd9adf6108a7015c192f4878766af71c488
Author: zbtzbtzbt <35...@users.noreply.github.com>
AuthorDate: Tue Mar 1 11:25:12 2022 +0800

    [improvement][vec] better memequal impl to speed up string compare (#8229)
    
    like #8214
    
    faster string compare operator in vec engine.
---
 be/src/runtime/string_value.hpp |   4 ++
 be/src/vec/common/string_ref.h  | 107 +++++++++++++---------------------------
 2 files changed, 37 insertions(+), 74 deletions(-)

diff --git a/be/src/runtime/string_value.hpp b/be/src/runtime/string_value.hpp
index c44115d..aac9e3a 100644
--- a/be/src/runtime/string_value.hpp
+++ b/be/src/runtime/string_value.hpp
@@ -22,6 +22,7 @@
 
 #include "runtime/string_value.h"
 #include "util/cpu_info.h"
+#include "vec/common/string_ref.h"
 #ifdef __SSE4_2__
 #include "util/sse_util.hpp"
 #endif
@@ -88,6 +89,9 @@ inline bool StringValue::eq(const StringValue& other) const {
     if (this->len != other.len) {
         return false;
     }
+#if defined(__SSE2__)
+    return memequalSSE2Wide(this->ptr, other.ptr, this->len);
+#endif
 
     return string_compare(this->ptr, this->len, other.ptr, other.len, this->len) == 0;
 }
diff --git a/be/src/vec/common/string_ref.h b/be/src/vec/common/string_ref.h
index 5dd146e..8ecbe07 100644
--- a/be/src/vec/common/string_ref.h
+++ b/be/src/vec/common/string_ref.h
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 // This file is copied from
-// https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/StringRef.h
+// https://github.com/ClickHouse/ClickHouse/blob/master/base/base/StringRef.h
 // and modified by Doris
 
 #pragma once
@@ -98,6 +98,32 @@ inline bool compareSSE2x4(const char* p1, const char* p2) {
 }
 
 inline bool memequalSSE2Wide(const char* p1, const char* p2, size_t size) {
+    /** The order of branches and the trick with overlapping comparisons
+      * are the same as in memcpy implementation.
+      * See the comments in
+      * https://github.com/ClickHouse/ClickHouse/blob/master/base/glibc-compatibility/memcpy/memcpy.h
+      */
+
+    if (size <= 16) {
+        if (size >= 8) {
+            /// Chunks of [8,16] bytes.
+            return unaligned_load<uint64_t>(p1) == unaligned_load<uint64_t>(p2) &&
+                   unaligned_load<uint64_t>(p1 + size - 8) == unaligned_load<uint64_t>(p2 + size - 8);
+        } else if (size >= 4) {
+            /// Chunks of [4,7] bytes.
+            return unaligned_load<uint32_t>(p1) == unaligned_load<uint32_t>(p2) &&
+                   unaligned_load<uint32_t>(p1 + size - 4) == unaligned_load<uint32_t>(p2 + size - 4);
+        } else if (size >= 2) {
+            /// Chunks of [2,3] bytes.
+            return unaligned_load<uint16_t>(p1) == unaligned_load<uint16_t>(p2) &&
+                   unaligned_load<uint16_t>(p1 + size - 2) == unaligned_load<uint16_t>(p2 + size - 2);
+        } else if (size >= 1) {
+            /// A single byte.
+            return *p1 == *p2;
+        }
+        return true;
+    }
+    
     while (size >= 64) {
         if (compareSSE2x4(p1, p2)) {
             p1 += 64;
@@ -107,74 +133,14 @@ inline bool memequalSSE2Wide(const char* p1, const char* p2, size_t size) {
             return false;
     }
 
-    switch ((size % 64) / 16) {
-    case 3:
-        if (!compareSSE2(p1 + 32, p2 + 32)) return false;
-        [[fallthrough]];
-    case 2:
-        if (!compareSSE2(p1 + 16, p2 + 16)) return false;
-        [[fallthrough]];
-    case 1:
-        if (!compareSSE2(p1, p2)) return false;
-        [[fallthrough]];
-    case 0:
-        break;
-    }
-
-    p1 += (size % 64) / 16 * 16;
-    p2 += (size % 64) / 16 * 16;
-
-    switch (size % 16) {
-    case 15:
-        if (p1[14] != p2[14]) return false;
-        [[fallthrough]];
-    case 14:
-        if (p1[13] != p2[13]) return false;
-        [[fallthrough]];
-    case 13:
-        if (p1[12] != p2[12]) return false;
-        [[fallthrough]];
-    case 12:
-        if (unaligned_load<uint32_t>(p1 + 8) == unaligned_load<uint32_t>(p2 + 8))
-            goto l8;
-        else
-            return false;
-    case 11:
-        if (p1[10] != p2[10]) return false;
-        [[fallthrough]];
-    case 10:
-        if (p1[9] != p2[9]) return false;
-        [[fallthrough]];
-    case 9:
-        if (p1[8] != p2[8]) return false;
-    l8:
-        [[fallthrough]];
-    case 8:
-        return unaligned_load<uint64_t>(p1) == unaligned_load<uint64_t>(p2);
-    case 7:
-        if (p1[6] != p2[6]) return false;
-        [[fallthrough]];
-    case 6:
-        if (p1[5] != p2[5]) return false;
-        [[fallthrough]];
-    case 5:
-        if (p1[4] != p2[4]) return false;
-        [[fallthrough]];
-    case 4:
-        return unaligned_load<uint32_t>(p1) == unaligned_load<uint32_t>(p2);
-    case 3:
-        if (p1[2] != p2[2]) return false;
-        [[fallthrough]];
-    case 2:
-        return unaligned_load<uint16_t>(p1) == unaligned_load<uint16_t>(p2);
-    case 1:
-        if (p1[0] != p2[0]) return false;
-        [[fallthrough]];
-    case 0:
-        break;
+    switch (size / 16)
+    {
+        case 3: if (!compareSSE2(p1 + 32, p2 + 32)) return false; [[fallthrough]];
+        case 2: if (!compareSSE2(p1 + 16, p2 + 16)) return false; [[fallthrough]];
+        case 1: if (!compareSSE2(p1, p2)) return false;
     }
 
-    return true;
+    return compareSSE2(p1 + size - 16, p2 + size - 16);
 }
 
 #endif
@@ -322,13 +288,6 @@ inline void set(StringRef& x) {
 }
 } // namespace ZeroTraits
 
-inline bool operator==(StringRef lhs, const char* rhs) {
-    for (size_t pos = 0; pos < lhs.size; ++pos)
-        if (!rhs[pos] || lhs.data[pos] != rhs[pos]) return false;
-
-    return true;
-}
-
 inline std::ostream& operator<<(std::ostream& os, const StringRef& str) {
     if (str.data) os.write(str.data, str.size);
 

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org