You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by yi...@apache.org on 2023/04/08 08:04:17 UTC

[doris] branch master updated: [Optimization](string) optimize constant empty string compare ( column='', column!='') (#18321)

This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 58bbd46c65 [Optimization](string) optimize constant empty string compare ( column='',  column!='') (#18321)
58bbd46c65 is described below

commit 58bbd46c65805c2de394b8da331033520fc1bd35
Author: ZhangYu0123 <67...@users.noreply.github.com>
AuthorDate: Sat Apr 8 16:04:10 2023 +0800

    [Optimization](string) optimize constant empty string compare ( column='',  column!='') (#18321)
    
    Optimize constant empty string compare:
    (1) When the constant empy string '' (size is 0), we can compare offsets in SIMD directly.
    
    q10: SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10;
    q11: SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10;
    q12: SELECT SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
    q13: SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
    q14: SELECT SearchEngineID, SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10;
    Issue Number: close #xxx
---
 be/src/vec/functions/functions_comparison.h | 26 +++++++++++++++++---------
 1 file changed, 17 insertions(+), 9 deletions(-)

diff --git a/be/src/vec/functions/functions_comparison.h b/be/src/vec/functions/functions_comparison.h
index 714467c576..1d72a5d5b8 100644
--- a/be/src/vec/functions/functions_comparison.h
+++ b/be/src/vec/functions/functions_comparison.h
@@ -211,15 +211,23 @@ struct StringEqualsImpl {
                                                  ColumnString::Offset b_size,
                                                  PaddedPODArray<UInt8>& c) {
         size_t size = a_offsets.size();
-        ColumnString::Offset prev_a_offset = 0;
-
-        for (size_t i = 0; i < size; ++i) {
-            auto a_size = a_offsets[i] - prev_a_offset;
-
-            c[i] = positive == memequal_small_allow_overflow15(a_data.data() + prev_a_offset,
-                                                               a_size, b_data.data(), b_size);
-
-            prev_a_offset = a_offsets[i];
+        if (b_size == 0) {
+            auto* __restrict data = c.data();
+            auto* __restrict offsets = a_offsets.data();
+            for (size_t i = 0; i < size; ++i) {
+                data[i] =
+                        positive ? (offsets[i] == offsets[i - 1]) : (offsets[i] != offsets[i - 1]);
+            }
+        } else {
+            ColumnString::Offset prev_a_offset = 0;
+            const auto* a_pos = a_data.data();
+            const auto* b_pos = b_data.data();
+            for (size_t i = 0; i < size; ++i) {
+                auto a_size = a_offsets[i] - prev_a_offset;
+                c[i] = positive == memequal_small_allow_overflow15(a_pos + prev_a_offset, a_size,
+                                                                   b_pos, b_size);
+                prev_a_offset = a_offsets[i];
+            }
         }
     }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org