You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by kx...@apache.org on 2023/07/13 13:05:44 UTC
[doris] 14/14: [fix](merge-on-write) fix wrong result when query with prefix key predicate (#21770)
This is an automated email from the ASF dual-hosted git repository.
kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git
commit 3eec3fe70f4d1ed394c58346dc60f411e50a1c80
Author: Xin Liao <li...@126.com>
AuthorDate: Thu Jul 13 19:56:00 2023 +0800
[fix](merge-on-write) fix wrong result when query with prefix key predicate (#21770)
---
be/src/olap/rowset/segment_v2/segment_iterator.cpp | 4 +-
be/src/util/key_util.h | 7 +-
.../correctness_p0/test_select_with_prefix.out | 93 +++++++++++++
.../correctness_p0/test_select_with_prefix.groovy | 149 +++++++++++++++++++++
4 files changed, 251 insertions(+), 2 deletions(-)
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index 9b8ec54b24..1c6bfbadb0 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -1149,8 +1149,10 @@ Status SegmentIterator::_lookup_ordinal_from_pk_index(const RowCursor& key, bool
DCHECK(pk_index_reader != nullptr);
std::string index_key;
+ // when is_include is false, we shoudle append KEY_NORMAL_MARKER to the
+ // encode key. Otherwise, we will get an incorrect upper bound.
encode_key_with_padding<RowCursor, true, true>(
- &index_key, key, _segment->_tablet_schema->num_key_columns(), is_include);
+ &index_key, key, _segment->_tablet_schema->num_key_columns(), is_include, true);
if (index_key < _segment->min_key()) {
*rowid = 0;
return Status::OK();
diff --git a/be/src/util/key_util.h b/be/src/util/key_util.h
index ccd67b9cdd..3648a45c56 100644
--- a/be/src/util/key_util.h
+++ b/be/src/util/key_util.h
@@ -59,15 +59,20 @@ constexpr uint8_t KEY_MAXIMAL_MARKER = 0xFF;
// fill a marker and return. If padding_minimal is true, KEY_MINIMAL_MARKER will
// be added, if padding_minimal is false, KEY_MAXIMAL_MARKER will be added.
// If all num_keys are found in row, no marker will be added.
+// if padding_minimal is false and padding_normal_marker is true,
+// KEY_NORMAL_MARKER will be added.
template <typename RowType, bool null_first = true, bool full_encode = false>
void encode_key_with_padding(std::string* buf, const RowType& row, size_t num_keys,
- bool padding_minimal) {
+ bool padding_minimal, bool padding_normal_marker = false) {
for (auto cid = 0; cid < num_keys; cid++) {
auto field = row.schema()->column(cid);
if (field == nullptr) {
if (padding_minimal) {
buf->push_back(KEY_MINIMAL_MARKER);
} else {
+ if (padding_normal_marker) {
+ buf->push_back(KEY_NORMAL_MARKER);
+ }
buf->push_back(KEY_MAXIMAL_MARKER);
}
break;
diff --git a/regression-test/data/correctness_p0/test_select_with_prefix.out b/regression-test/data/correctness_p0/test_select_with_prefix.out
new file mode 100644
index 0000000000..8b59f59f49
--- /dev/null
+++ b/regression-test/data/correctness_p0/test_select_with_prefix.out
@@ -0,0 +1,93 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !sql --
+
+-- !sql --
+
+-- !sql --
+abcd efgh 1 abcdad 2023-06-27T00:00 1
+abcd efgh 2 abcdad 2023-06-27T00:00 1
+abcd efghf 2 abcdad 2023-06-27T00:00 1
+
+-- !sql --
+abcd
+abcd
+abcd
+
+-- !sql --
+
+-- !sql --
+efgh
+efgh
+efghf
+
+-- !sql --
+ efgh 1 abcdad 2023-06-27T00:00 1
+ mefgh 1 abcdad 2023-06-27T00:00 1
+
+-- !sql --
+
+
+
+-- !sql --
+2
+
+-- !sql --
+
+-- !sql --
+
+-- !sql --
+ efgh 1 abcdad 2023-06-27T00:00 1
+abcd efgh 1 abcdad 2023-06-27T00:00 1
+abcd efgh 2 abcdad 2023-06-27T00:00 1
+
+-- !sql --
+
+abcd
+abcd
+
+-- !sql --
+
+-- !sql --
+
+-- !sql --
+abcd efgh 1 abcdad 2023-06-27T00:00 1
+abcd efgh 2 abcdad 2023-06-27T00:00 1
+abcd efghf 2 abcdad 2023-06-27T00:00 1
+
+-- !sql --
+abcd
+abcd
+abcd
+
+-- !sql --
+
+-- !sql --
+efgh
+efgh
+efghf
+
+-- !sql --
+ efgh 1 abcdad 2023-06-27T00:00 1
+ mefgh 1 abcdad 2023-06-27T00:00 1
+
+-- !sql --
+
+
+
+-- !sql --
+2
+
+-- !sql --
+
+-- !sql --
+
+-- !sql --
+ efgh 1 abcdad 2023-06-27T00:00 1
+abcd efgh 1 abcdad 2023-06-27T00:00 1
+abcd efgh 2 abcdad 2023-06-27T00:00 1
+
+-- !sql --
+
+abcd
+abcd
+
diff --git a/regression-test/suites/correctness_p0/test_select_with_prefix.groovy b/regression-test/suites/correctness_p0/test_select_with_prefix.groovy
new file mode 100644
index 0000000000..c12e0da4c1
--- /dev/null
+++ b/regression-test/suites/correctness_p0/test_select_with_prefix.groovy
@@ -0,0 +1,149 @@
+
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_select_with_prefix") {
+ def tableName = "test_select_with_perfix"
+ sql """ DROP TABLE IF EXISTS $tableName """
+ sql """
+ CREATE TABLE $tableName (
+ `c0` varchar(64) NULL,
+ `c1` varchar(64) NULL,
+ `c2` bigint(20) NOT NULL,
+ `c3` char(32) NULL,
+ `c4` datetime NULL,
+ `c5` tinyint(4) NOT NULL
+ ) ENGINE=OLAP
+ UNIQUE KEY(`c0`, `c1`, `c2`, `c3`)
+ COMMENT 'OLAP'
+ DISTRIBUTED BY HASH(`c0`) BUCKETS 1
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1",
+ "enable_unique_key_merge_on_write" = "true"
+ );
+ """
+
+ sql """
+ INSERT INTO $tableName (`c0`, `c1`, `c2`, `c3`, `c4`, `c5`) VALUES
+ ('', 'efgh', 1, 'abcdad', '2023-06-27 00:00:00', 1),
+ ('', 'mefgh', 1, 'abcdad', '2023-06-27 00:00:00', 1),
+ ('abcd', 'efgh', 1, 'abcdad', '2023-06-27 00:00:00', 1),
+ ('abcd', 'efgh', 2, 'abcdad', '2023-06-27 00:00:00', 1),
+ ('abcd', 'efghf', 2, 'abcdad', '2023-06-27 00:00:00', 1),
+ ('abcde', 'ab', 2, '2efraeef', '2023-06-27 00:00:00', 1),
+ ('abcdf', 'ab', 2, '2efraeef', '2023-06-27 00:00:00', 1),
+ ('abcdf', 'dab', 2, '2efraeef', '2023-06-27 00:00:00', 1),
+ ('abce', 'dab', 2, '2efraeef', '2023-06-27 00:00:00', 1),
+ ('abce', 'ldab', 2, '2efraeef', '2023-06-27 00:00:00', 1),
+ ('abd', 'ldab', 2, '2efraeef', '2023-06-27 00:00:00', 1),
+ ('def', 'hh', 2, 'afad', '2023-06-27 00:00:00', 1)
+ """
+
+ sql "sync"
+
+ qt_sql "select * from $tableName where c0='abc'"
+
+ qt_sql "select c0 from $tableName where c0='abc'"
+
+ qt_sql "select * from $tableName where c0='abcd'"
+
+ qt_sql "select c0 from $tableName where c0='abcd'"
+
+ qt_sql "select c1 from $tableName where c0='abc'"
+
+ qt_sql "select c1 from $tableName where c0='abcd'"
+
+ qt_sql "select * from $tableName where c0=''"
+
+ qt_sql "select c0 from $tableName where c0=''"
+
+ qt_sql "select count(*) from $tableName where c0=''"
+
+ qt_sql "select * from $tableName where c1='efg'"
+
+ qt_sql "select c0 from $tableName where c1='efg'"
+
+ qt_sql "select * from $tableName where c1='efgh'"
+
+ qt_sql "select c0 from $tableName where c1='efgh'"
+
+ sql """ DROP TABLE IF EXISTS $tableName """
+ sql """
+ CREATE TABLE $tableName (
+ `c0` varchar(64) NULL,
+ `c1` varchar(64) NULL,
+ `c2` bigint(20) NOT NULL,
+ `c3` char(32) NULL,
+ `c4` datetime NULL,
+ `c5` tinyint(4) NOT NULL
+ ) ENGINE=OLAP
+ UNIQUE KEY(`c0`, `c1`, `c2`, `c3`)
+ COMMENT 'OLAP'
+ DISTRIBUTED BY HASH(`c0`) BUCKETS 1
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1",
+ "enable_unique_key_merge_on_write" = "false"
+ );
+ """
+
+ sql """
+ INSERT INTO $tableName (`c0`, `c1`, `c2`, `c3`, `c4`, `c5`) VALUES
+ ('', 'efgh', 1, 'abcdad', '2023-06-27 00:00:00', 1),
+ ('', 'mefgh', 1, 'abcdad', '2023-06-27 00:00:00', 1),
+ ('abcd', 'efgh', 1, 'abcdad', '2023-06-27 00:00:00', 1),
+ ('abcd', 'efgh', 2, 'abcdad', '2023-06-27 00:00:00', 1),
+ ('abcd', 'efghf', 2, 'abcdad', '2023-06-27 00:00:00', 1),
+ ('abcde', 'ab', 2, '2efraeef', '2023-06-27 00:00:00', 1),
+ ('abcdf', 'ab', 2, '2efraeef', '2023-06-27 00:00:00', 1),
+ ('abcdf', 'dab', 2, '2efraeef', '2023-06-27 00:00:00', 1),
+ ('abce', 'dab', 2, '2efraeef', '2023-06-27 00:00:00', 1),
+ ('abce', 'ldab', 2, '2efraeef', '2023-06-27 00:00:00', 1),
+ ('abd', 'ldab', 2, '2efraeef', '2023-06-27 00:00:00', 1),
+ ('def', 'hh', 2, 'afad', '2023-06-27 00:00:00', 1)
+ """
+
+ sql "sync"
+
+ qt_sql "select * from $tableName where c0='abc'"
+
+ qt_sql "select c0 from $tableName where c0='abc'"
+
+ qt_sql "select * from $tableName where c0='abcd'"
+
+ qt_sql "select c0 from $tableName where c0='abcd'"
+
+ qt_sql "select c1 from $tableName where c0='abc'"
+
+ qt_sql "select c1 from $tableName where c0='abcd'"
+
+ qt_sql "select * from $tableName where c0=''"
+
+ qt_sql "select c0 from $tableName where c0=''"
+
+ qt_sql "select count(*) from $tableName where c0=''"
+
+ qt_sql "select * from $tableName where c1='efg'"
+
+ qt_sql "select c0 from $tableName where c1='efg'"
+
+ qt_sql "select * from $tableName where c1='efgh'"
+
+ qt_sql "select c0 from $tableName where c1='efgh'"
+
+ sql """ DROP TABLE IF EXISTS $tableName """
+}
+
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org