You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by mo...@apache.org on 2022/03/02 02:24:43 UTC
[incubator-doris] 02/04: [improvement][vec] better memequal impl to speed up string compare (#8229)
This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch dev-1.0.0
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
commit 22746292fd73fa0a9fae40a8dde8eafd6a2a1f3c
Author: zbtzbtzbt <35...@users.noreply.github.com>
AuthorDate: Tue Mar 1 11:25:12 2022 +0800
[improvement][vec] better memequal impl to speed up string compare (#8229)
like #8214
faster string compare operator in vec engine.
---
be/src/runtime/string_value.hpp | 4 ++
be/src/vec/common/string_ref.h | 107 +++++++++++++---------------------------
2 files changed, 37 insertions(+), 74 deletions(-)
diff --git a/be/src/runtime/string_value.hpp b/be/src/runtime/string_value.hpp
index c44115d..aac9e3a 100644
--- a/be/src/runtime/string_value.hpp
+++ b/be/src/runtime/string_value.hpp
@@ -22,6 +22,7 @@
#include "runtime/string_value.h"
#include "util/cpu_info.h"
+#include "vec/common/string_ref.h"
#ifdef __SSE4_2__
#include "util/sse_util.hpp"
#endif
@@ -88,6 +89,9 @@ inline bool StringValue::eq(const StringValue& other) const {
if (this->len != other.len) {
return false;
}
+#if defined(__SSE2__)
+ return memequalSSE2Wide(this->ptr, other.ptr, this->len);
+#endif
return string_compare(this->ptr, this->len, other.ptr, other.len, this->len) == 0;
}
diff --git a/be/src/vec/common/string_ref.h b/be/src/vec/common/string_ref.h
index 5dd146e..8ecbe07 100644
--- a/be/src/vec/common/string_ref.h
+++ b/be/src/vec/common/string_ref.h
@@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.
// This file is copied from
-// https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/StringRef.h
+// https://github.com/ClickHouse/ClickHouse/blob/master/base/base/StringRef.h
// and modified by Doris
#pragma once
@@ -98,6 +98,32 @@ inline bool compareSSE2x4(const char* p1, const char* p2) {
}
inline bool memequalSSE2Wide(const char* p1, const char* p2, size_t size) {
+ /** The order of branches and the trick with overlapping comparisons
+ * are the same as in memcpy implementation.
+ * See the comments in
+ * https://github.com/ClickHouse/ClickHouse/blob/master/base/glibc-compatibility/memcpy/memcpy.h
+ */
+
+ if (size <= 16) {
+ if (size >= 8) {
+ /// Chunks of [8,16] bytes.
+ return unaligned_load<uint64_t>(p1) == unaligned_load<uint64_t>(p2) &&
+ unaligned_load<uint64_t>(p1 + size - 8) == unaligned_load<uint64_t>(p2 + size - 8);
+ } else if (size >= 4) {
+ /// Chunks of [4,7] bytes.
+ return unaligned_load<uint32_t>(p1) == unaligned_load<uint32_t>(p2) &&
+ unaligned_load<uint32_t>(p1 + size - 4) == unaligned_load<uint32_t>(p2 + size - 4);
+ } else if (size >= 2) {
+ /// Chunks of [2,3] bytes.
+ return unaligned_load<uint16_t>(p1) == unaligned_load<uint16_t>(p2) &&
+ unaligned_load<uint16_t>(p1 + size - 2) == unaligned_load<uint16_t>(p2 + size - 2);
+ } else if (size >= 1) {
+ /// A single byte.
+ return *p1 == *p2;
+ }
+ return true;
+ }
+
while (size >= 64) {
if (compareSSE2x4(p1, p2)) {
p1 += 64;
@@ -107,74 +133,14 @@ inline bool memequalSSE2Wide(const char* p1, const char* p2, size_t size) {
return false;
}
- switch ((size % 64) / 16) {
- case 3:
- if (!compareSSE2(p1 + 32, p2 + 32)) return false;
- [[fallthrough]];
- case 2:
- if (!compareSSE2(p1 + 16, p2 + 16)) return false;
- [[fallthrough]];
- case 1:
- if (!compareSSE2(p1, p2)) return false;
- [[fallthrough]];
- case 0:
- break;
- }
-
- p1 += (size % 64) / 16 * 16;
- p2 += (size % 64) / 16 * 16;
-
- switch (size % 16) {
- case 15:
- if (p1[14] != p2[14]) return false;
- [[fallthrough]];
- case 14:
- if (p1[13] != p2[13]) return false;
- [[fallthrough]];
- case 13:
- if (p1[12] != p2[12]) return false;
- [[fallthrough]];
- case 12:
- if (unaligned_load<uint32_t>(p1 + 8) == unaligned_load<uint32_t>(p2 + 8))
- goto l8;
- else
- return false;
- case 11:
- if (p1[10] != p2[10]) return false;
- [[fallthrough]];
- case 10:
- if (p1[9] != p2[9]) return false;
- [[fallthrough]];
- case 9:
- if (p1[8] != p2[8]) return false;
- l8:
- [[fallthrough]];
- case 8:
- return unaligned_load<uint64_t>(p1) == unaligned_load<uint64_t>(p2);
- case 7:
- if (p1[6] != p2[6]) return false;
- [[fallthrough]];
- case 6:
- if (p1[5] != p2[5]) return false;
- [[fallthrough]];
- case 5:
- if (p1[4] != p2[4]) return false;
- [[fallthrough]];
- case 4:
- return unaligned_load<uint32_t>(p1) == unaligned_load<uint32_t>(p2);
- case 3:
- if (p1[2] != p2[2]) return false;
- [[fallthrough]];
- case 2:
- return unaligned_load<uint16_t>(p1) == unaligned_load<uint16_t>(p2);
- case 1:
- if (p1[0] != p2[0]) return false;
- [[fallthrough]];
- case 0:
- break;
+ switch (size / 16)
+ {
+ case 3: if (!compareSSE2(p1 + 32, p2 + 32)) return false; [[fallthrough]];
+ case 2: if (!compareSSE2(p1 + 16, p2 + 16)) return false; [[fallthrough]];
+ case 1: if (!compareSSE2(p1, p2)) return false;
}
- return true;
+ return compareSSE2(p1 + size - 16, p2 + size - 16);
}
#endif
@@ -322,13 +288,6 @@ inline void set(StringRef& x) {
}
} // namespace ZeroTraits
-inline bool operator==(StringRef lhs, const char* rhs) {
- for (size_t pos = 0; pos < lhs.size; ++pos)
- if (!rhs[pos] || lhs.data[pos] != rhs[pos]) return false;
-
- return true;
-}
-
inline std::ostream& operator<<(std::ostream& os, const StringRef& str) {
if (str.data) os.write(str.data, str.size);
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org