You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by kx...@apache.org on 2024/01/26 04:56:43 UTC

(doris) branch branch-2.0 updated: [fix](phrase_prefix) fix match_phrase_prefix query incorrect result (#29946) (#30223)

This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new 54d1081b6ae [fix](phrase_prefix) fix match_phrase_prefix query incorrect result (#29946) (#30223)
54d1081b6ae is described below

commit 54d1081b6ae8f387e9eb16766f92be3ce39d6ca0
Author: zzzxl <33...@users.noreply.github.com>
AuthorDate: Fri Jan 26 12:56:36 2024 +0800

    [fix](phrase_prefix) fix match_phrase_prefix query incorrect result (#29946) (#30223)
---
 .../inverted_index/query/phrase_prefix_query.cpp   |  5 +-
 .../test_index_match_phrase_prefix_1.out           | 13 +++++
 .../test_index_match_phrase_prefix_1.groovy        | 62 ++++++++++++++++++++++
 3 files changed, 79 insertions(+), 1 deletion(-)

diff --git a/be/src/olap/rowset/segment_v2/inverted_index/query/phrase_prefix_query.cpp b/be/src/olap/rowset/segment_v2/inverted_index/query/phrase_prefix_query.cpp
index 4b0340cda4a..7ac2aff3e14 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index/query/phrase_prefix_query.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index/query/phrase_prefix_query.cpp
@@ -17,6 +17,7 @@
 
 #include "phrase_prefix_query.h"
 
+#include "CLucene/util/stringUtil.h"
 #include "olap/rowset//segment_v2/inverted_index/query/prefix_query.h"
 
 namespace doris {
@@ -42,7 +43,9 @@ void PhrasePrefixQuery::add(const std::wstring& field_name, const std::vector<st
             PrefixQuery::get_prefix_terms(_searcher->getReader(), field_name, terms[i],
                                           prefix_terms, _max_expansions);
             if (prefix_terms.empty()) {
-                continue;
+                std::wstring ws_term = StringUtil::string_to_wstring(terms[i]);
+                Term* t = _CLNEW Term(field_name.c_str(), ws_term.c_str());
+                prefix_terms.push_back(t);
             }
             _query.add(prefix_terms);
             for (auto& t : prefix_terms) {
diff --git a/regression-test/data/inverted_index_p0/test_index_match_phrase_prefix_1.out b/regression-test/data/inverted_index_p0/test_index_match_phrase_prefix_1.out
new file mode 100644
index 00000000000..e7e934f394e
--- /dev/null
+++ b/regression-test/data/inverted_index_p0/test_index_match_phrase_prefix_1.out
@@ -0,0 +1,13 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !sql --
+7
+
+-- !sql --
+7
+
+-- !sql --
+6
+
+-- !sql --
+6
+
diff --git a/regression-test/suites/inverted_index_p0/test_index_match_phrase_prefix_1.groovy b/regression-test/suites/inverted_index_p0/test_index_match_phrase_prefix_1.groovy
new file mode 100644
index 00000000000..1e2ac0d7900
--- /dev/null
+++ b/regression-test/suites/inverted_index_p0/test_index_match_phrase_prefix_1.groovy
@@ -0,0 +1,62 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+
+suite("test_index_match_phrase_prefix_1", "p0"){
+    def indexTbName1 = "test_index_match_phrase_prefix_1"
+
+    sql "DROP TABLE IF EXISTS ${indexTbName1}"
+
+    sql """
+      CREATE TABLE ${indexTbName1} (
+      `a` int(11) NULL COMMENT "",
+      `b` string NULL COMMENT "",
+      `c` string NULL COMMENT "",
+      `d` string NULL COMMENT "",
+      INDEX b_idx (`b`) USING INVERTED COMMENT '',
+      INDEX c_idx (`c`) USING INVERTED PROPERTIES("parser" = "unicode", "support_phrase" = "true") COMMENT '',
+      INDEX d_idx (`d`) USING INVERTED PROPERTIES("parser" = "english", "support_phrase" = "true") COMMENT ''
+      ) ENGINE=OLAP
+      DUPLICATE KEY(`a`)
+      COMMENT "OLAP"
+      DISTRIBUTED BY RANDOM BUCKETS 1
+      PROPERTIES (
+      "replication_allocation" = "tag.location.default: 1"
+      );
+    """
+
+    sql """ INSERT INTO ${indexTbName1} VALUES (1, "O1704361998540E2Cemx9S", "O1704361998540E2Cemx9S", "O1704361998540E2Cemx9S"); """
+    sql """ INSERT INTO ${indexTbName1} VALUES (2, "O1704361998540E2Cemx9S)123456789", "O1704361998540E2Cemx9S)123456789", "O1704361998540E2Cemx9S)123456789"); """
+    sql """ INSERT INTO ${indexTbName1} VALUES (3, "O1704361998540E2Cemx9S=123456789", "O1704361998540E2Cemx9S=123456789", "O1704361998540E2Cemx9S=123456789"); """
+    sql """ INSERT INTO ${indexTbName1} VALUES (4, "O1704361998540E2Cemx9S+123456789", "O1704361998540E2Cemx9S+123456789", "O1704361998540E2Cemx9S+123456789"); """
+    sql """ INSERT INTO ${indexTbName1} VALUES (5, "O1704361998540E2Cemx9S!123456789", "O1704361998540E2Cemx9S!123456789", "O1704361998540E2Cemx9S!123456789"); """
+    sql """ INSERT INTO ${indexTbName1} VALUES (6, "O1704361998540E2Cemx9S 123456789", "O1704361998540E2Cemx9S 123456789", "O1704361998540E2Cemx9S 123456789"); """
+    sql """ INSERT INTO ${indexTbName1} VALUES (7, "O1704361998540E2Cemx9S*123456789", "O1704361998540E2Cemx9S*123456789", "O1704361998540E2Cemx9S*123456789"); """
+
+    try {
+        sql "sync"
+
+        qt_sql """ select count() from ${indexTbName1} where c match_phrase_prefix 'O1704361998540E2Cemx9S'; """
+        qt_sql """ select count() from ${indexTbName1} where d match_phrase_prefix 'O1704361998540E2Cemx9S'; """
+
+        qt_sql """ select count() from ${indexTbName1} where c match_phrase_prefix 'O1704361998540E2Cemx9S=123456789'; """
+        qt_sql """ select count() from ${indexTbName1} where d match_phrase_prefix 'O1704361998540E2Cemx9S=123456789'; """
+
+    } finally {
+        //try_sql("DROP TABLE IF EXISTS ${testTable}")
+    }
+}
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org