You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by da...@apache.org on 2022/10/22 13:26:31 UTC

[doris] branch master updated: [Bug](sort) Fix bug in string sorter (#13548)

This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new a7c221d04e [Bug](sort) Fix bug in string sorter (#13548)
a7c221d04e is described below

commit a7c221d04e5fb5cea2556961c76c26f7b3b9eed5
Author: Gabriel <ga...@gmail.com>
AuthorDate: Sat Oct 22 21:26:23 2022 +0800

    [Bug](sort) Fix bug in string sorter (#13548)
---
 be/src/exprs/bloomfilter_predicate.h             | 32 +++++++++++++++++-------
 be/src/vec/core/sort_block.h                     |  5 ++--
 regression-test/data/query_p0/sort/sort.out      |  9 +++++++
 regression-test/suites/query_p0/sort/sort.groovy | 25 ++++++++++++++++++
 4 files changed, 60 insertions(+), 11 deletions(-)

diff --git a/be/src/exprs/bloomfilter_predicate.h b/be/src/exprs/bloomfilter_predicate.h
index ab07fd8e1b..5ce3ce47d5 100644
--- a/be/src/exprs/bloomfilter_predicate.h
+++ b/be/src/exprs/bloomfilter_predicate.h
@@ -126,15 +126,7 @@ public:
         // If `_inited` is false, there is no memory allocated in bloom filter and this is the first
         // call for `merge` function. So we just reuse this bloom filter, and we don't need to
         // allocate memory again.
-        if (!_inited) {
-            auto other_func = static_cast<BloomFilterFuncBase*>(bloomfilter_func);
-            DCHECK(_bloom_filter == nullptr);
-            DCHECK(bloomfilter_func != nullptr);
-            _bloom_filter = bloomfilter_func->_bloom_filter;
-            _bloom_filter_alloced = other_func->_bloom_filter_alloced;
-            _inited = true;
-            return Status::OK();
-        } else {
+        if (_inited) {
             DCHECK(bloomfilter_func != nullptr);
             auto other_func = static_cast<BloomFilterFuncBase*>(bloomfilter_func);
             if (_bloom_filter_alloced != other_func->_bloom_filter_alloced) {
@@ -145,6 +137,28 @@ public:
             }
             return _bloom_filter->merge(other_func->_bloom_filter.get());
         }
+        {
+            std::lock_guard<std::mutex> l(_lock);
+            if (!_inited) {
+                auto other_func = static_cast<BloomFilterFuncBase*>(bloomfilter_func);
+                DCHECK(_bloom_filter == nullptr);
+                DCHECK(bloomfilter_func != nullptr);
+                _bloom_filter = bloomfilter_func->_bloom_filter;
+                _bloom_filter_alloced = other_func->_bloom_filter_alloced;
+                _inited = true;
+                return Status::OK();
+            } else {
+                DCHECK(bloomfilter_func != nullptr);
+                auto other_func = static_cast<BloomFilterFuncBase*>(bloomfilter_func);
+                if (_bloom_filter_alloced != other_func->_bloom_filter_alloced) {
+                    LOG(WARNING) << "bloom filter size not the same: already allocated bytes = "
+                                 << _bloom_filter_alloced << ", expected allocated bytes = "
+                                 << other_func->_bloom_filter_alloced;
+                    return Status::InvalidArgument("bloom filter size invalid");
+                }
+                return _bloom_filter->merge(other_func->_bloom_filter.get());
+            }
+        }
     }
 
     Status assign(const char* data, int len) {
diff --git a/be/src/vec/core/sort_block.h b/be/src/vec/core/sort_block.h
index ef21d99d92..9d4b9ca2b7 100644
--- a/be/src/vec/core/sort_block.h
+++ b/be/src/vec/core/sort_block.h
@@ -401,8 +401,9 @@ private:
                 return a.inline_value > b.inline_value ? 1
                                                        : (a.inline_value < b.inline_value ? -1 : 0);
             } else {
-                return memcmp_small_allow_overflow15(a.inline_value.data, a.inline_value.size,
-                                                     b.inline_value.data, b.inline_value.size);
+                return memcmp_small_allow_overflow15(
+                        reinterpret_cast<const UInt8*>(a.inline_value.data), a.inline_value.size,
+                        reinterpret_cast<const UInt8*>(b.inline_value.data), b.inline_value.size);
             }
         };
 
diff --git a/regression-test/data/query_p0/sort/sort.out b/regression-test/data/query_p0/sort/sort.out
new file mode 100644
index 0000000000..b4a5c9ee01
--- /dev/null
+++ b/regression-test/data/query_p0/sort/sort.out
@@ -0,0 +1,9 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !sort_string_single_column --
+2022-01-01
+汇总
+
+-- !sort_string_multiple_columns --
+2022-01-01	1
+汇总	1
+
diff --git a/regression-test/suites/query_p0/sort/sort.groovy b/regression-test/suites/query_p0/sort/sort.groovy
new file mode 100644
index 0000000000..b967c93137
--- /dev/null
+++ b/regression-test/suites/query_p0/sort/sort.groovy
@@ -0,0 +1,25 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// The cases is copied from https://github.com/trinodb/trino/tree/master
+// /testing/trino-product-tests/src/main/resources/sql-tests/testcases/aggregate
+// and modified by Doris.
+
+suite("sort") {
+    qt_sort_string_single_column """ select * from ( select '汇总' as a union all select '2022-01-01' as a ) a order by 1 """
+    qt_sort_string_multiple_columns """ select * from ( select '汇总' as a,1 as b union all select '2022-01-01' as a,1 as b ) a order by 1,2 """
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org