You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by kx...@apache.org on 2023/07/13 13:05:44 UTC

[doris] 14/14: [fix](merge-on-write) fix wrong result when query with prefix key predicate (#21770)

This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 3eec3fe70f4d1ed394c58346dc60f411e50a1c80
Author: Xin Liao <li...@126.com>
AuthorDate: Thu Jul 13 19:56:00 2023 +0800

    [fix](merge-on-write) fix wrong result when query with prefix key predicate (#21770)
---
 be/src/olap/rowset/segment_v2/segment_iterator.cpp |   4 +-
 be/src/util/key_util.h                             |   7 +-
 .../correctness_p0/test_select_with_prefix.out     |  93 +++++++++++++
 .../correctness_p0/test_select_with_prefix.groovy  | 149 +++++++++++++++++++++
 4 files changed, 251 insertions(+), 2 deletions(-)

diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index 9b8ec54b24..1c6bfbadb0 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -1149,8 +1149,10 @@ Status SegmentIterator::_lookup_ordinal_from_pk_index(const RowCursor& key, bool
     DCHECK(pk_index_reader != nullptr);
 
     std::string index_key;
+    // when is_include is false, we shoudle append KEY_NORMAL_MARKER to the
+    // encode key. Otherwise, we will get an incorrect upper bound.
     encode_key_with_padding<RowCursor, true, true>(
-            &index_key, key, _segment->_tablet_schema->num_key_columns(), is_include);
+            &index_key, key, _segment->_tablet_schema->num_key_columns(), is_include, true);
     if (index_key < _segment->min_key()) {
         *rowid = 0;
         return Status::OK();
diff --git a/be/src/util/key_util.h b/be/src/util/key_util.h
index ccd67b9cdd..3648a45c56 100644
--- a/be/src/util/key_util.h
+++ b/be/src/util/key_util.h
@@ -59,15 +59,20 @@ constexpr uint8_t KEY_MAXIMAL_MARKER = 0xFF;
 // fill a marker and return. If padding_minimal is true, KEY_MINIMAL_MARKER will
 // be added, if padding_minimal is false, KEY_MAXIMAL_MARKER will be added.
 // If all num_keys are found in row, no marker will be added.
+// if padding_minimal is false and padding_normal_marker is true,
+// KEY_NORMAL_MARKER will be added.
 template <typename RowType, bool null_first = true, bool full_encode = false>
 void encode_key_with_padding(std::string* buf, const RowType& row, size_t num_keys,
-                             bool padding_minimal) {
+                             bool padding_minimal, bool padding_normal_marker = false) {
     for (auto cid = 0; cid < num_keys; cid++) {
         auto field = row.schema()->column(cid);
         if (field == nullptr) {
             if (padding_minimal) {
                 buf->push_back(KEY_MINIMAL_MARKER);
             } else {
+                if (padding_normal_marker) {
+                    buf->push_back(KEY_NORMAL_MARKER);
+                }
                 buf->push_back(KEY_MAXIMAL_MARKER);
             }
             break;
diff --git a/regression-test/data/correctness_p0/test_select_with_prefix.out b/regression-test/data/correctness_p0/test_select_with_prefix.out
new file mode 100644
index 0000000000..8b59f59f49
--- /dev/null
+++ b/regression-test/data/correctness_p0/test_select_with_prefix.out
@@ -0,0 +1,93 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !sql --
+
+-- !sql --
+
+-- !sql --
+abcd	efgh	1	abcdad	2023-06-27T00:00	1
+abcd	efgh	2	abcdad	2023-06-27T00:00	1
+abcd	efghf	2	abcdad	2023-06-27T00:00	1
+
+-- !sql --
+abcd
+abcd
+abcd
+
+-- !sql --
+
+-- !sql --
+efgh
+efgh
+efghf
+
+-- !sql --
+	efgh	1	abcdad	2023-06-27T00:00	1
+	mefgh	1	abcdad	2023-06-27T00:00	1
+
+-- !sql --
+
+
+
+-- !sql --
+2
+
+-- !sql --
+
+-- !sql --
+
+-- !sql --
+	efgh	1	abcdad	2023-06-27T00:00	1
+abcd	efgh	1	abcdad	2023-06-27T00:00	1
+abcd	efgh	2	abcdad	2023-06-27T00:00	1
+
+-- !sql --
+
+abcd
+abcd
+
+-- !sql --
+
+-- !sql --
+
+-- !sql --
+abcd	efgh	1	abcdad	2023-06-27T00:00	1
+abcd	efgh	2	abcdad	2023-06-27T00:00	1
+abcd	efghf	2	abcdad	2023-06-27T00:00	1
+
+-- !sql --
+abcd
+abcd
+abcd
+
+-- !sql --
+
+-- !sql --
+efgh
+efgh
+efghf
+
+-- !sql --
+	efgh	1	abcdad	2023-06-27T00:00	1
+	mefgh	1	abcdad	2023-06-27T00:00	1
+
+-- !sql --
+
+
+
+-- !sql --
+2
+
+-- !sql --
+
+-- !sql --
+
+-- !sql --
+	efgh	1	abcdad	2023-06-27T00:00	1
+abcd	efgh	1	abcdad	2023-06-27T00:00	1
+abcd	efgh	2	abcdad	2023-06-27T00:00	1
+
+-- !sql --
+
+abcd
+abcd
+
diff --git a/regression-test/suites/correctness_p0/test_select_with_prefix.groovy b/regression-test/suites/correctness_p0/test_select_with_prefix.groovy
new file mode 100644
index 0000000000..c12e0da4c1
--- /dev/null
+++ b/regression-test/suites/correctness_p0/test_select_with_prefix.groovy
@@ -0,0 +1,149 @@
+
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_select_with_prefix") {
+    def tableName = "test_select_with_perfix"
+    sql """ DROP TABLE IF EXISTS $tableName """
+    sql """
+        CREATE TABLE $tableName (
+       `c0` varchar(64) NULL,
+       `c1` varchar(64) NULL,
+       `c2` bigint(20) NOT NULL,
+       `c3` char(32) NULL,
+       `c4` datetime  NULL,
+       `c5` tinyint(4) NOT NULL
+     ) ENGINE=OLAP
+     UNIQUE KEY(`c0`, `c1`, `c2`, `c3`)
+     COMMENT 'OLAP'
+     DISTRIBUTED BY HASH(`c0`) BUCKETS 1
+     PROPERTIES (
+     "replication_allocation" = "tag.location.default: 1",
+     "enable_unique_key_merge_on_write" = "true"
+     );
+    """
+
+    sql """
+         INSERT INTO $tableName (`c0`, `c1`, `c2`, `c3`, `c4`, `c5`) VALUES
+             ('', 'efgh', 1, 'abcdad', '2023-06-27 00:00:00', 1),
+             ('', 'mefgh', 1, 'abcdad', '2023-06-27 00:00:00', 1),
+             ('abcd', 'efgh', 1, 'abcdad', '2023-06-27 00:00:00', 1),
+             ('abcd', 'efgh', 2, 'abcdad', '2023-06-27 00:00:00', 1),
+             ('abcd', 'efghf', 2, 'abcdad', '2023-06-27 00:00:00', 1),
+             ('abcde', 'ab', 2, '2efraeef', '2023-06-27 00:00:00', 1),
+             ('abcdf', 'ab', 2, '2efraeef', '2023-06-27 00:00:00', 1),
+             ('abcdf', 'dab', 2, '2efraeef', '2023-06-27 00:00:00', 1),
+             ('abce', 'dab', 2, '2efraeef', '2023-06-27 00:00:00', 1),
+             ('abce', 'ldab', 2, '2efraeef', '2023-06-27 00:00:00', 1),
+             ('abd', 'ldab', 2, '2efraeef', '2023-06-27 00:00:00', 1),
+             ('def', 'hh', 2, 'afad', '2023-06-27 00:00:00', 1)
+    """
+
+    sql "sync"
+
+    qt_sql "select * from $tableName where c0='abc'"
+
+    qt_sql "select c0 from $tableName where c0='abc'"
+
+    qt_sql "select * from $tableName where c0='abcd'"
+
+    qt_sql "select c0 from $tableName where c0='abcd'"
+
+    qt_sql "select c1 from $tableName where c0='abc'"
+
+    qt_sql "select c1 from $tableName where c0='abcd'"
+
+    qt_sql "select * from $tableName where c0=''"
+
+    qt_sql "select c0 from $tableName where c0=''"
+    
+    qt_sql "select count(*) from $tableName where c0=''"
+
+    qt_sql "select * from $tableName where c1='efg'"
+
+    qt_sql "select c0 from $tableName where c1='efg'"
+
+    qt_sql "select * from $tableName where c1='efgh'"
+
+    qt_sql "select c0 from $tableName where c1='efgh'"
+
+    sql """ DROP TABLE IF EXISTS $tableName """
+    sql """
+        CREATE TABLE $tableName (
+       `c0` varchar(64) NULL,
+       `c1` varchar(64) NULL,
+       `c2` bigint(20) NOT NULL,
+       `c3` char(32) NULL,
+       `c4` datetime  NULL,
+       `c5` tinyint(4) NOT NULL
+     ) ENGINE=OLAP
+     UNIQUE KEY(`c0`, `c1`, `c2`, `c3`)
+     COMMENT 'OLAP'
+     DISTRIBUTED BY HASH(`c0`) BUCKETS 1
+     PROPERTIES (
+     "replication_allocation" = "tag.location.default: 1",
+     "enable_unique_key_merge_on_write" = "false"
+     );
+    """
+
+    sql """
+         INSERT INTO $tableName (`c0`, `c1`, `c2`, `c3`, `c4`, `c5`) VALUES
+             ('', 'efgh', 1, 'abcdad', '2023-06-27 00:00:00', 1),
+             ('', 'mefgh', 1, 'abcdad', '2023-06-27 00:00:00', 1),
+             ('abcd', 'efgh', 1, 'abcdad', '2023-06-27 00:00:00', 1),
+             ('abcd', 'efgh', 2, 'abcdad', '2023-06-27 00:00:00', 1),
+             ('abcd', 'efghf', 2, 'abcdad', '2023-06-27 00:00:00', 1),
+             ('abcde', 'ab', 2, '2efraeef', '2023-06-27 00:00:00', 1),
+             ('abcdf', 'ab', 2, '2efraeef', '2023-06-27 00:00:00', 1),
+             ('abcdf', 'dab', 2, '2efraeef', '2023-06-27 00:00:00', 1),
+             ('abce', 'dab', 2, '2efraeef', '2023-06-27 00:00:00', 1),
+             ('abce', 'ldab', 2, '2efraeef', '2023-06-27 00:00:00', 1),
+             ('abd', 'ldab', 2, '2efraeef', '2023-06-27 00:00:00', 1),
+             ('def', 'hh', 2, 'afad', '2023-06-27 00:00:00', 1)
+    """
+
+    sql "sync"
+
+    qt_sql "select * from $tableName where c0='abc'"
+
+    qt_sql "select c0 from $tableName where c0='abc'"
+
+    qt_sql "select * from $tableName where c0='abcd'"
+
+    qt_sql "select c0 from $tableName where c0='abcd'"
+
+    qt_sql "select c1 from $tableName where c0='abc'"
+
+    qt_sql "select c1 from $tableName where c0='abcd'"
+
+    qt_sql "select * from $tableName where c0=''"
+
+    qt_sql "select c0 from $tableName where c0=''"
+    
+    qt_sql "select count(*) from $tableName where c0=''"
+
+    qt_sql "select * from $tableName where c1='efg'"
+
+    qt_sql "select c0 from $tableName where c1='efg'"
+
+    qt_sql "select * from $tableName where c1='efgh'"
+
+    qt_sql "select c0 from $tableName where c1='efgh'"
+
+    sql """ DROP TABLE IF EXISTS $tableName """
+}
+


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org